Created
August 16, 2020 12:56
-
-
Save arghyadeep99/8467a58c08e47070b80f926861b20eee to your computer and use it in GitHub Desktop.
Decision Trees Implementation.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "name": "Decision Trees Implementation.ipynb", | |
| "provenance": [], | |
| "collapsed_sections": [], | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/arghyadeep99/8467a58c08e47070b80f926861b20eee/decision-trees-implementation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "I6qKGWQz4i3b", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "# Decision Trees\n", | |
| "“The possible solutions to a given problem emerge as the leaves of a tree, each node representing a point of deliberation and decision.”\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "fdeQCp-yHtL7", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "To understand how Decision Trees are like sophisticated \"if-else\", click [here](https://stackoverflow.com/questions/20224526/how-to-extract-the-decision-rules-from-scikit-learn-decision-tree)." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "EYd2O_Lm5BcF", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "from sklearn import datasets\n", | |
| "from sklearn.tree import DecisionTreeClassifier" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "DdxxbmYG5JyE", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "iris = datasets.load_iris()" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "0HuImXCiaEZK", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "x_setosa = iris['data'][:50, (2)]\n", | |
| "y_setosa = iris['data'][:50, (3)]\n", | |
| "x_versicolor = iris['data'][50:100, (2)]\n", | |
| "y_versicolor = iris['data'][50:100, (3)]\n", | |
| "x_virginica = iris['data'][100:150, (2)]\n", | |
| "y_virginica = iris['data'][100:150, (3)]\n" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "mlOnK2fMcaoo", | |
| "colab_type": "code", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 298 | |
| }, | |
| "outputId": "959d86a3-5708-499e-a793-cc9f28013289" | |
| }, | |
| "source": [ | |
| "import matplotlib.pyplot as plt\n", | |
| "\n", | |
| "plt.xlabel('petal_length')\n", | |
| "plt.ylabel('petal_width')\n", | |
| "plt.plot(x_setosa,y_setosa, 'ro') #plotting setosa flower\n", | |
| "plt.plot(x_versicolor,y_versicolor, 'go') #plotting versicolor flower\n", | |
| "plt.plot(x_virginica,y_virginica, 'yo')" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "[<matplotlib.lines.Line2D at 0x7f0129fca2e8>]" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 6 | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [], | |
| "needs_background": "light" | |
| } | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "oe45ucCrzott", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "**Decision Boundary**\n", | |
| "\n", | |
| "Decision Trees divide the input space into axis-parallel rectangles\n", | |
| "and label each rectangle with one of the K classes\n", | |
| "\n", | |
| "\n", | |
| "\n", | |
| "[Decision Boundaries for iris Sklearn dataset](https://scikit-learn.org/stable/auto_examples/tree/plot_iris_dtc.html)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "wAVSZkpezhpa", | |
| "colab_type": "code", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 315 | |
| }, | |
| "outputId": "34e79ae0-9ead-41b5-ec0c-4ca555ef6972" | |
| }, | |
| "source": [ | |
| "print('Decision Boundary')\n", | |
| "plt.xlabel('petal_length')\n", | |
| "plt.ylabel('petal_width')\n", | |
| "plt.plot(x_setosa,y_setosa, 'ro') #plotting setosa flower\n", | |
| "plt.plot(x_versicolor,y_versicolor, 'go') #plotting versicolor flower\n", | |
| "plt.plot(x_virginica,y_virginica, 'yo')\n", | |
| "plt.plot([0,7], [0.8, 0.8], 'k-', lw=2)\n", | |
| "plt.plot([0,7], [1.75, 1.75], 'k-', lw=2)" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "Decision Boundary\n" | |
| ], | |
| "name": "stdout" | |
| }, | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "[<matplotlib.lines.Line2D at 0x7f01299cb630>]" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 11 | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [], | |
| "needs_background": "light" | |
| } | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "6eb3gp-qgLJy", | |
| "colab_type": "code", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 88 | |
| }, | |
| "outputId": "7e06c850-e2e7-4ab1-945b-eede9b61e8fe" | |
| }, | |
| "source": [ | |
| "iris.feature_names" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "['sepal length (cm)',\n", | |
| " 'sepal width (cm)',\n", | |
| " 'petal length (cm)',\n", | |
| " 'petal width (cm)']" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 8 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "gtKZzGge5OxQ", | |
| "colab_type": "code", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "X = iris.data[:,2:]\n", | |
| "y = iris.target" | |
| ], | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "id": "APiN_J_-5cL7", | |
| "colab_type": "code", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 124 | |
| }, | |
| "outputId": "9f8234e1-3e2d-400a-e7a3-ab335ecb6aae" | |
| }, | |
| "source": [ | |
| "tree_clf = DecisionTreeClassifier(max_depth=2)\n", | |
| "tree_clf.fit(X, y)" | |
| ], | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',\n", | |
| " max_depth=2, max_features=None, max_leaf_nodes=None,\n", | |
| " min_impurity_decrease=0.0, min_impurity_split=None,\n", | |
| " min_samples_leaf=1, min_samples_split=2,\n", | |
| " min_weight_fraction_leaf=0.0, presort='deprecated',\n", | |
| " random_state=None, splitter='best')" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 10 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "MVRLrlcj34rj", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "**Task For You**\n", | |
| "\n", | |
| "Implement Decision Trees from scratch using either gini index or entropy as impurity metric" | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment