Skip to content

Instantly share code, notes, and snippets.

@neoyipeng2018
Last active March 21, 2021 21:28
Show Gist options
  • Select an option

  • Save neoyipeng2018/73b61bec5e7034a92ea0a4e393202f7d to your computer and use it in GitHub Desktop.

Select an option

Save neoyipeng2018/73b61bec5e7034a92ea0a4e393202f7d to your computer and use it in GitHub Desktop.
Using Embeddings for Financial Sentiment Classification
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Using Embeddings for Financial Sentiment Classification",
"provenance": [],
"collapsed_sections": [
"7jk8iwKC57b7",
"IjnvPbcXUmbz",
"5toz2xc6UeNd",
"Jq4Vqx0g6fK-"
],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"b39d76fce7e346dcb785811bc331b440": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_0a2bc9b24529487da58592a7b3b581c8",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_e0baf84f88184887b39f9b204e73d033",
"IPY_MODEL_18b4e117c6924028aa2a5eab572a10ad"
]
}
},
"0a2bc9b24529487da58592a7b3b581c8": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"e0baf84f88184887b39f9b204e73d033": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_2762aefdcf3644fe8e892090731d220d",
"_dom_classes": [],
"description": "100%",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 1811,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 1811,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_c9617ce14cd74ec79a76d3a9bfff29b2"
}
},
"18b4e117c6924028aa2a5eab572a10ad": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_b6b486660a6347298f9b59601236c073",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 1811/1811 [02:39<00:00, 11.37it/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_4d766770c27948c4a22920e9fa397ba6"
}
},
"2762aefdcf3644fe8e892090731d220d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "initial",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"c9617ce14cd74ec79a76d3a9bfff29b2": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"b6b486660a6347298f9b59601236c073": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"4d766770c27948c4a22920e9fa397ba6": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"bfb1e90fc662469dac770e832e072917": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
"_view_name": "HBoxView",
"_dom_classes": [],
"_model_name": "HBoxModel",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_90241d13a80545788bfc5aec35076748",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_d04a55472f7b43538fce43ec6cbc15b8",
"IPY_MODEL_63ba97555ef941218636097d9f920f31"
]
}
},
"90241d13a80545788bfc5aec35076748": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"d04a55472f7b43538fce43ec6cbc15b8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_bd84a3f1dea84a82ab7da822472cc088",
"_dom_classes": [],
"description": "100%",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 453,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 453,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_c7c8cf83c7ae4eefb3fce43172bcc59b"
}
},
"63ba97555ef941218636097d9f920f31": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_922c377294ed4f4cbd97474fbfa607d0",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 453/453 [00:38<00:00, 11.72it/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_fa12a4671e7c49ccadf380d966ee4be1"
}
},
"bd84a3f1dea84a82ab7da822472cc088": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "ProgressStyleModel",
"description_width": "initial",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"bar_color": null,
"_model_module": "@jupyter-widgets/controls"
}
},
"c7c8cf83c7ae4eefb3fce43172bcc59b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
},
"922c377294ed4f4cbd97474fbfa607d0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
"_view_name": "StyleView",
"_model_name": "DescriptionStyleModel",
"description_width": "",
"_view_module": "@jupyter-widgets/base",
"_model_module_version": "1.5.0",
"_view_count": null,
"_view_module_version": "1.2.0",
"_model_module": "@jupyter-widgets/controls"
}
},
"fa12a4671e7c49ccadf380d966ee4be1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
"_view_name": "LayoutView",
"grid_template_rows": null,
"right": null,
"justify_content": null,
"_view_module": "@jupyter-widgets/base",
"overflow": null,
"_model_module_version": "1.2.0",
"_view_count": null,
"flex_flow": null,
"width": null,
"min_width": null,
"border": null,
"align_items": null,
"bottom": null,
"_model_module": "@jupyter-widgets/base",
"top": null,
"grid_column": null,
"overflow_y": null,
"overflow_x": null,
"grid_auto_flow": null,
"grid_area": null,
"grid_template_columns": null,
"flex": null,
"_model_name": "LayoutModel",
"justify_items": null,
"grid_row": null,
"max_height": null,
"align_content": null,
"visibility": null,
"align_self": null,
"height": null,
"min_height": null,
"padding": null,
"grid_auto_rows": null,
"grid_gap": null,
"max_width": null,
"order": null,
"_view_module_version": "1.2.0",
"grid_template_areas": null,
"object_position": null,
"object_fit": null,
"grid_auto_columns": null,
"margin": null,
"display": null,
"left": null
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/neoyipeng2018/73b61bec5e7034a92ea0a4e393202f7d/magnitudephrasebank.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "gbjyDfFc_W_2",
"colab_type": "text"
},
"source": [
"# Using Embeddings for Financial Sentiment Classification\n",
"Recycling example: https://colab.research.google.com/drive/1lOcAhIffLW8XC6QsKzt5T_ZqPP4Y9eS4.\n",
"\n",
"Note, the original example is super old (Python2!!) and the keras model just doesn't generalize well. So I averaged the vectors and used good ol Random Forest."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7jk8iwKC57b7",
"colab_type": "text"
},
"source": [
"## Setup"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nO-VAs78CnJI",
"colab_type": "text"
},
"source": [
"First, we'll install some dependencies and download the Magnitude file we wish to use."
]
},
{
"cell_type": "code",
"metadata": {
"id": "dsIvcCA5_FBI",
"colab_type": "code",
"colab": {}
},
"source": [
"!pip install -q pymagnitude #tensorflow keras\n",
"#glove: !curl -s http://magnitude.plasticity.ai/glove+subword/glove.6B.50d.magnitude --output vectors.magnitude\n",
"#word2vec: !curl -s http://magnitude.plasticity.ai/word2vec+subword/GoogleNews-vectors-negative300.magnitude --output vectors.magnitude\n",
"#fastText: !curl -s http://magnitude.plasticity.ai/fasttext+subword/wiki-news-300d-1M.magnitude --output vectors.magnitude\n",
"#elmo light: !curl -s http://magnitude.plasticity.ai/elmo/light/elmo_2x1024_128_2048cnn_1xhighway_weights.magnitude --output vectors.magnitude\n",
"!curl -s http://magnitude.plasticity.ai/elmo/light/elmo_2x1024_128_2048cnn_1xhighway_weights.magnitude --output vectors.magnitude"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "95Xg9EyU-ZYr",
"colab_type": "text"
},
"source": [
"Next, we'll import what we need to create our model and define some hyperparameters."
]
},
{
"cell_type": "code",
"metadata": {
"id": "1ODh9a4szHt6",
"colab_type": "code",
"colab": {}
},
"source": [
"from pymagnitude import *\n",
"\n",
"MAX_WORDS = 30 # The maximum number of words the sequence model will consider\n",
"vectors = Magnitude('./vectors.magnitude', pad_to_length = MAX_WORDS)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "IjnvPbcXUmbz",
"colab_type": "text"
},
"source": [
"## Getting Data"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "AWllqkJDx_z7",
"colab_type": "text"
},
"source": [
"Download from https://www.researchgate.net/publication/251231364_FinancialPhraseBank-v10. I'm using my google drive for ease."
]
},
{
"cell_type": "code",
"metadata": {
"id": "H1fnkEco1Hyh",
"colab_type": "code",
"outputId": "b25130f7-85bc-401e-ac28-2a2826592604",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 139
}
},
"source": [
"from google.colab import drive\n",
"drive.mount('/content/gdrive', force_remount=True)\n",
"%cd '/content/gdrive/My Drive/Colab Notebooks'"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n",
"\n",
"Enter your authorization code:\n",
"··········\n",
"Mounted at /content/gdrive\n",
"/content/gdrive/My Drive/Colab Notebooks\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "aRNHigUG4mI3",
"colab_type": "code",
"colab": {}
},
"source": [
"#download from https://www.researchgate.net/publication/251231364_FinancialPhraseBank-v10\n",
"f = open(\"Sentences_AllAgree.txt\", \"r\",encoding = \"ISO-8859-1\")\n",
"test_file = f.read().split('\\n')"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "BcpdkqEKJbEK",
"colab_type": "code",
"colab": {}
},
"source": [
"from tqdm import notebook\n",
"import pandas as pd"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "E9ksIuT1Jfsh",
"colab_type": "code",
"colab": {}
},
"source": [
"def avg_vec(df):\n",
" vctrLs = []\n",
" for txt in notebook.tqdm(df.text.values): vctrLs.append(np.average(vectors.query(txt.split(' ')), axis = 0))\n",
" return np.array(vctrLs)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Ak3pY1PLJfor",
"colab_type": "code",
"outputId": "5f22ff96-5fb6-47e3-98f4-eebf10f1140e",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 111
}
},
"source": [
"df=pd.read_csv('Sentences_AllAgree.txt',encoding = \"ISO-8859-1\", names=['text','sentiment'], delimiter= '@')\n",
"df.dropna(inplace=True)\n",
"df.head(2)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>text</th>\n",
" <th>sentiment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>According to Gran , the company has no plans t...</td>\n",
" <td>neutral</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>For the last quarter of 2010 , Componenta 's n...</td>\n",
" <td>positive</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" text sentiment\n",
"0 According to Gran , the company has no plans t... neutral\n",
"1 For the last quarter of 2010 , Componenta 's n... positive"
]
},
"metadata": {
"tags": []
},
"execution_count": 78
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "txX9T_6DJyH3",
"colab_type": "code",
"colab": {}
},
"source": [
"train=df.sample(frac=0.8,random_state=42)\n",
"test=df.drop(train.index)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "R1--_SZCMZuz",
"colab_type": "code",
"outputId": "7ab097a9-c019-4374-e67b-818b8f759b8f",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 149,
"referenced_widgets": [
"b39d76fce7e346dcb785811bc331b440",
"0a2bc9b24529487da58592a7b3b581c8",
"e0baf84f88184887b39f9b204e73d033",
"18b4e117c6924028aa2a5eab572a10ad",
"2762aefdcf3644fe8e892090731d220d",
"c9617ce14cd74ec79a76d3a9bfff29b2",
"b6b486660a6347298f9b59601236c073",
"4d766770c27948c4a22920e9fa397ba6",
"bfb1e90fc662469dac770e832e072917",
"90241d13a80545788bfc5aec35076748",
"d04a55472f7b43538fce43ec6cbc15b8",
"63ba97555ef941218636097d9f920f31",
"bd84a3f1dea84a82ab7da822472cc088",
"c7c8cf83c7ae4eefb3fce43172bcc59b",
"922c377294ed4f4cbd97474fbfa607d0",
"fa12a4671e7c49ccadf380d966ee4be1"
]
}
},
"source": [
"xTrn,xTest=avg_vec(train),avg_vec(test)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b39d76fce7e346dcb785811bc331b440",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, max=1811.0), HTML(value='')))"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/pymagnitude/third_party/allennlp/nn/util.py:116: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than tensor.new_tensor(sourceTensor).\n",
" index_range = sequence_lengths.new_tensor(torch.arange(0, len(sequence_lengths)))\n"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "bfb1e90fc662469dac770e832e072917",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, max=453.0), HTML(value='')))"
]
},
"metadata": {
"tags": []
}
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "5toz2xc6UeNd",
"colab_type": "text"
},
"source": [
"## Modelling - Good Ol Random Forest"
]
},
{
"cell_type": "code",
"metadata": {
"id": "4yO5BE7bJa5c",
"colab_type": "code",
"colab": {}
},
"source": [
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.metrics import confusion_matrix"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "TvfwUeozJXIT",
"colab_type": "code",
"colab": {}
},
"source": [
"forest = RandomForestClassifier(n_estimators=100, random_state=0, max_features=0.5, \n",
" max_depth=4 ,min_samples_split=5,\n",
" oob_score=True, n_jobs=-1, min_samples_leaf=50)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "JsOZkLFKPCnl",
"colab_type": "code",
"colab": {}
},
"source": [
"def oneHot(row):\n",
" if row=='negative': return -1\n",
" if row=='neutral' : return 0\n",
" if row=='positive' : return +1"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "y6cgq8bePWY4",
"colab_type": "code",
"colab": {}
},
"source": [
"train.sentiment=train.sentiment.apply(oneHot)\n",
"test.sentiment =test.sentiment.apply(oneHot)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "O8W73LKmOsrx",
"colab_type": "code",
"outputId": "b928d677-d679-4beb-ad1d-0ac372ebe785",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 153
}
},
"source": [
"forest.fit(xTrn, train.sentiment)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,\n",
" criterion='gini', max_depth=4, max_features=0.5,\n",
" max_leaf_nodes=None, max_samples=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=50, min_samples_split=5,\n",
" min_weight_fraction_leaf=0.0, n_estimators=100,\n",
" n_jobs=-1, oob_score=True, random_state=0, verbose=0,\n",
" warm_start=False)"
]
},
"metadata": {
"tags": []
},
"execution_count": 127
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "eg-K2b3LPd3K",
"colab_type": "code",
"outputId": "0a0a0766-30ae-4806-93bf-034be834690e",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
}
},
"source": [
"print(\"Accuracy on training set: {:.3f}\".format(forest.score(xTrn, train.sentiment)))\n",
"oldscore = forest.oob_score_\n",
"print(f'OOB score is {oldscore*100:.1f}%')\n",
"#print('Out-of-bag score estimate: {:.3}'.format())"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Accuracy on training set: 0.795\n",
"OOB score is 76.7%\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Tf8FUNBJRtzK",
"colab_type": "code",
"colab": {}
},
"source": [
"import matplotlib.pyplot as plt"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "AvRNn5IZPjxH",
"colab_type": "code",
"outputId": "2ce3834c-ab94-481b-fac1-6b28dc3676ab",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 331
}
},
"source": [
"y_predict = forest.predict(xTest)\n",
"confusion_matrix(test.sentiment, y_predict)\n",
"\n",
"\n",
"cm = confusion_matrix(test.sentiment, y_predict)\n",
"print(\"Confusion matrix:\\n{}\".format(cm))\n",
"\n",
"\n",
"#Show confusion matrix in a separate window\n",
"plt.matshow(cm)\n",
"plt.title('Confusion matrix')\n",
"plt.colorbar()\n",
"plt.ylabel('True label')\n",
"plt.xlabel('Predicted label')\n",
"\n",
"\n",
"fmt = '.2f' #if normalize else 'd'\n",
"plt.show()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Confusion matrix:\n",
"[[ 7 13 41]\n",
" [ 0 280 6]\n",
" [ 2 40 64]]\n"
],
"name": "stdout"
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAQwAAAD2CAYAAAAj8rlYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAYvUlEQVR4nO3de7RcZX3G8e+TAAkEiMaEGAMI1qhNaUGacm0xQlWgl2BXRYQqtXShFbyBq4K1QrW0rrZeqgI2CDWIBqGAREECUllIF2gSjEgSkJRLSQiEAELkmnPOr3/sd2QI58y8c5iZPXvm+ay1V2ZfZr/vHJjfvPetiMDMLMeEsjNgZtXhgGFm2RwwzCybA4aZZXPAMLNsDhhmls0Bo2SStpf0XUmPS7r0JdznOEnXtjNvZZH0B5LuLDsf9mLyOIw8ko4FTgHeAGwGVgJnRcRNL/G+7wY+CBwUEUMvOaM9TlIAcyJibdl5sda5hJFB0inAF4F/AmYCuwPnAAvacPtXA78YhGCRQ9I2ZefBGogIbw02YCrwK+AdDa6ZRBFQHkjbF4FJ6dx8YB1wKrAR2AC8N537B+A5YEtK4wTgTOCiunvvAQSwTdr/S+BuilLOPcBxdcdvqnvfQcAy4PH070F1524APgP8T7rPtcD0MT5bLf9/W5f/o4AjgV8AjwKfqLt+P+Bm4Jfp2q8A26VzN6bP8mT6vO+su//HgQeBb9SOpff8Rkpj37T/KuBhYH7Z/2+0Y3vr/B3id39nUtYGXFN2fh3NmzsQmAxc0eCavwMOAPah+EJcCXwS+Pt0/pUUgWc28BbgvyR9JyLOSEX010bEXwBIOnOsRCRNAb4E/F5E3ClpFjBtlOumAVcBHwIWA+8ArpL02oh4JF12LHAEcD/wfeBjwGljJP3K9DeYTRGYzgOuA36XorS1XNLiiLgHGAY+CiwHdk33/gDwxYg4JH3evSNVSSTNT/efRlHamgDsX0s4Iv5X0seBiyTNA/4TWBQRN4z1d6qSTY8O8+Olu2Zdu+2s/53e4ew05SpJc68ANkXjKsNxwKcjYmNEPExRcnh33fkt6fyWiLia4tf19ePMzwiwl6TtI2JDRKwa5Zo/Au6KiG9ExFBELAbuAP6k7pr/jIhfRMTTwCUUwW4sWyjaa7YAFwPTgX+PiM0p/dXA3gARsSIibknp3gv8B/CmjM90RkQ8m/LzAhFxHrAW+DEwiyJA94lgOEaytl7ggNHcI8D0JnXrVwH31e3fl479+h5bBZyngB1bzUhEPElRjH8/sEHSVZLekJGfWp5m1+0/2EJ+HomI4fS69oV+qO7807X3S3qdpO9JelDSExTtPs1+GR+OiGeaXHMesBfw5Yh4tsm1lRHACJG19QIHjOZuBp6lqLeP5QGK4nTN7unYeDwJ7FC3/8r6kxGxNCLeQvFLewfFF6lZfmp5Wj/OPLXiXIp8zYmInYFPAGrynobfBkk7UrQLnQ+cmapcfSEItsRw1tYLBjpgSDpc0p2S1koatf4eEY8DnwLOlnSUpB0kbSvpCEn/ki5bDHxS0gxJ09P1F40zWyuBQyTtLmkqcHpdfmdKWpDaMp6lqNqMVla9GnidpGMlbSPpnRTtDf8q6fZx5ivXTsATwK9S6edvtjr/EPCaFu/578DyiPhriraZrza6WNJukn4oabWkVZI+3GJ6XeUSRgVImgicTdHwNxd4l6S5o10bEZ+jGIPxSYoW+vuBk4HvpEv+kaKR7zbg58Ct6VjLIuI64NvpXiuA79WdnpDy8QBFz8GbePEXktSw+ccUPTOPUPRwnAK8bTx5atHHKBpUN1OUfr691fkzgUWSfinp6GY3k7QAOJznP+cpwL6SjmvwtiHg1IiYS9EYfdJY/23LFsAwkbX1goEduCXpQODMiHhb2j8dICL+udSMdZCkPYDvRcReJWelqyRdCXwlBeOess/e28V135+Rde0usx9YERHzOpylhga5W3U2RUmhZh113XnWH1KQfCNFD0vPCWC4Qj/agxwwrM+lxtLLgI9ExBNl52csvdFhmmeQA8Z6YLe6/V3pTi+CdYGkbSmCxTcj4vKy8zOW6KH2iRyDHDCWAXMk7UkRKI6haKyzipMkii7YNRHx+bLz00gEbKlOvBjcXpI0kOpkYCmwBrhkjFGTfUHSYooxJa+XtE7SCWXnqYMOphhpe6iklWk7suxMjU4MZ269YJBLGKRh2leXnY9uiIh3lZ2HboliyYHe+IY1EcBIhUoYAx0wzHpBr5QecjhgmJWoGLjlgGFmmUbCAcPMMriEYWbZArElJpadjWwD261aI+nEsvPQTYP0eavwWWsljKp0qw58wAB6/n+qNhukz1uBzyqGY0LW1vROY0zrl3SmpPWjjUmRdHpa3uFOSU1nM7tKYlaiYsWttv1u16b13yppJ2CFpNoM3S9ExL/VX5ym/B8D/BbFKm0/kPS6utXVXqSnAsZ2mhzba0pX05ysKUyd8Ipyhs5M6H4Bb/KEKUzdZkbXP29M2rbbSTJ5u6nsPOVVXf+szzz7S54beiq7DtGu6kZEbKBYqZ2I2CxpDS9clnFrC4CL05KH90hay/Orvo+qpwLG9prCAZN7dARvB2jypLKz0DUje+atjN0PbrljYfa1EcqqbrRqq2n9BwMnS3oPxUJPp0bEYxTB5Ja6t62jcYBxG4ZZ2UZQ1kaxGPXyum3UNppRpvWfS/F8l30oSiCfG29ee6qEYTZoAvFcZH8NNzVbcWu0af0R8VDd+fN4ftnHlpd4cAnDrES1Rs+crZmxpvWnB17VvB2oLQS9BDhG0qS0zMMc4CeN0nAJw6xkw+0bGl6b1v9zSSvTsU9QLHBdeyrfvcD7ACJilaRLKB5ENQSc1KiHBBwwzEoViOE2FfQbTOsfcwmHiDgLOCs3DQcMs5KNdKCXpFMcMMxKVAwNd8AwswxVm3zmgGFWogg6MnCrUxwwzEr160FZleCAYVai4slnLmGYWSY3eppZlkBe09PM8rmEYWZZ3K1qZtmKJ5+5hGFmmXplgd8cDhhmJYqQSxhmls/jMMwsS7GAjqskZpalM4sAd4oDhlmJAtytamZ5PNLTzFrSxiefdVxHcyrp8PTMxrWSTutkWmZVVKyHoaytF3SshCFpInA28BaKJyotk7QkIlZ3Kk2zKnKVpLAfsDYi7gaQdDHFsxwdMMySog2jOlWSTgaM2cD9dfvrgP07mJ5ZJXloeAvS8yFPhOJJ6maDJBBDI+5WhcznNkbEQmAhwNQJr4gO5sesJ3mkZ2EZMCc9s3E9cAxwbAfTM6ucWi9JVXQsYETEkKSTgaXAROCCiFjVqfTMqsqNnklEXE2D5zqaDTqP9DSzlrgNw8yyFEv0OWCYWY6oVrdqdVpbzPpQbQGdnK0ZSbtJ+qGk1ZJWSfpwOj5N0nWS7kr/vjwdl6Qvpblet0nat1kaDhhmJRsJZW0ZhoBTI2IucABwkqS5wGnA9RExB7g+7QMcAcxJ24nAuc0ScMAwK1GtDaMdASMiNkTEren1ZmANxRSNBcCidNki4Kj0egFwYRRuAV4maVajNNyGYVayFho9p0taXre/MI2UfhFJewBvBH4MzIyIDenUg8DM9Hq0+V6zgQ2MwQHDrEQtjsPYFBHzml0kaUfgMuAjEfGE9Pz9IyIkjXsKhgOGWZkChto40lPSthTB4psRcXk6/JCkWRGxIVU5NqbjWfO96rkNw6xE7WzDUFGUOB9YExGfrzu1BDg+vT4euLLu+HtSb8kBwON1VZdRuYRhVrI2Dtw6GHg38HNJK9OxTwCfBS6RdAJwH3B0Onc1cCSwFngKeG+zBBwwzErUzrkkEXETjDlg47BRrg/gpFbScMAwK1l4aLiZ5fLkMzPLEuHJZ2aWTQyPVKez0gHDrGRuwzCzLF4Pw8zyRdGOURUOGGYlcy+JmWUJ3IZhZtm8ariZtWBkxAHDzDJEuEoybhHByDPPlJ2Nrll69y1lZ6FrDt9z/7Kz0D3PPtvS5a6SmFk2d6uaWTZXScwsSyAHDDPLV6EaiQOGWakCwt2qZpbLVRIzy9YXvSSSvkyD6lVEfKgjOTIbIP00l2R5g3Nm1g4B9EPAiIhF9fuSdoiIpzqfJbPBUqUqSdPFBCUdKGk1cEfa31vSOR3PmdmgiMytB+SsPvpF4G3AIwAR8TPgkE5mymxwiBjJ23pBVi9JRNxf/wRoYLgz2TEbMH04W/V+SQcBkZ4M/WFgTWezZTZAeqS6kSOnSvJ+iucvzgYeAPahxecxmlkjytzK17SEERGbgOO6kBezwdRPJQxJr5H0XUkPS9oo6UpJr+lG5swGQp/1knwLuASYBbwKuBRY3MlMmQ2MNPmsXb0kki5IP+y31x07U9J6SSvTdmTdudMlrZV0p6S3Nbt/TsDYISK+ERFDabsImJyVezNrrr0ljK8Dh49y/AsRsU/argaQNBc4Bvit9J5zJE1sdPMxA4akaZKmAd+XdJqkPSS9WtLfAldnZ9/MGgvlbTm3irgReDQz5QXAxRHxbETcA6wF9mv0hkaNniso4lotp++rzxdwemamzKwBdad94mRJ76GYI3ZqRDxG0fNZvxL1unRsTI3mkuzZjlyaWQOtVTemS6qfFLowIhZmvO9c4DMppc8AnwP+qoVc/lrWSE9JewFzqWu7iIgLx5OgmdXLr24AmyJiXqspRMRDv05NOg/4XtpdD+xWd+mu6diYcrpVzwC+nLY3A/8C/GlrWTazMXW4W1XSrLrdtwO1HpQlwDGSJknaE5gD/KTRvXJKGH8O7A38NCLeK2kmcFHr2TazUY2071aSFgPzKaov64AzgPmS9qEIO/eS2iMjYpWkS4DVwBBwUkQ0nCeWEzCejogRSUOSdgY28sJizFgZvwD4Y2BjROyVkY7Z4GnzAjoR8a5RDp/f4PqzgLNy758zDmO5pJcB51H0nNwK3Jzxvq8zen+wmdVR5G29IGcuyQfSy69KugbYOSJuy3jfjZL2eGnZMxsAPRIMcjRaBHjfRuci4tbOZMnMelWjEsbnGpwL4NB2ZEDSicCJAJPZoR23NKuUXqlu5Gg0cOvN3chAGniyEGBnTavQn86sTfpsxS0z65Sgrd2qnZbTSzIuqT/4ZuD1ktZJOqFTaZlVWV/1kozXGP3BZra1HgkGOXKGhkvSX0j6VNrfXVLDKbBm1oI+W3HrHOBAoFZi2Ayc3bEcmQ2Q3OpIlaok+0fEvpJ+ChARj0narsP5MhscfdZLsiUt2xUAkmZQqXZdsx7XI6WHHDkB40vAFcAuks6imL36yY7mymyAqEI/vzlzSb4paQVwGMVyfUdFhJ98ZtYOPdQ+kaNpwJC0O/AU8N36YxHxf53MmNnA6KeAAVzF84sBTwb2BO6kWJrczF6qfgoYEfHb9ftpFusHxrjczFpUpSpJy0PD07T2/TuQFzPrcTltGKfU7U4A9qV4iruZtUOFShg5bRg71b0eomjTuKwz2TEbMNFH3appwNZOEfGxLuXHbPD0QwlD0jYRMSTp4G5myGyQiGo1ejYqYfyEor1ipaQlwKXAk7WTEXF5h/NmNhj6JGDUTAYeoVjDszYeIwAHDLOXqo9Geu6Sekhu54VPcYdKxUSzHlehb1OjgDER2JEXBoqaCn1Es97WL70kGyLi013LidmgqtDPb6OAUZ1VPcyqqoeW38vRKGAc1rVcmA2wvmj0jIhHu5kRs4HVDwHDzLqjSiWMjj3IyMwytfExA5IukLRR0u11x6ZJuk7SXenfl6fjkvQlSWsl3dboAew1vVfC0OC0tR5x5LFlZ6FrnjlsStlZ6Jq46YbsazvwCIGvA18BLqw7dhpwfUR8VtJpaf/jwBHAnLTtD5xLk6UrXMIwK1sbSxgRcSOwdfvjAmBRer0IOKru+IVRuAV4maRZje7vgGFWsi48yGhmRGxIrx8EZqbXs4H7665bl46NqfeqJGaDJj8YTJe0vG5/YUQsbCmpiJDGH34cMMzKlv/13RQR88aRwkOSZkXEhlTl2JiOrwd2q7tu13RsTK6SmJWpO89WXQIcn14fD1xZd/w9qbfkAODxuqrLqFzCMCtbG3tJJC0G5lNUX9YBZwCfBS6RdAJwH3B0uvxq4EhgLcWzh97b7P4OGGYla+ds1Yh41xinXjTVIyICOKmV+ztgmJWsSiM9HTDMytRHs1XNrBscMMwsRz+tGm5m3eCAYWa5FNWJGA4YZmXqp0clmlkXVKeA4YBhVjY3eppZPgcMM8vSR49KNLNucMAwsxweuGVmLdFIdSKGA4ZZmTz5zMxa4YFbZpbPJQwzy+VGTzPLE0CFJp91bNVwSbtJ+qGk1ZJWSfpwp9IyqzKN5G29oJMljCHg1Ii4VdJOwApJ10XE6g6maVYpHoeRpOcbbEivN0taQ/EYNgcMs5qISlVJutKGIWkP4I3Aj7uRnlmVuIRRR9KOwGXARyLiiVHOnwicCDCZHTqdHbPe44BRkLQtRbD4ZkRcPto16WGyCwF21rQK/enM2sMlDECSgPOBNRHx+U6lY1ZpAVRoLkknH8Z8MPBu4FBJK9N2ZAfTM6skd6sCEXETRa+RmTXiXhIzy+U2DDPL4+ntZparGOlZnYjhgGFWtjY2aEq6F9gMDANDETFP0jTg28AewL3A0RHx2Hju38leEjPLoIisrQVvjoh9ImJe2j8NuD4i5gDXp/1xccAwK1NEMQ4jZxu/BcCi9HoRcNR4b+SAYVYyRd6WKYBrJa1I0y4AZqbJoAAPAjPHm1e3YZiVLb+6MV3S8rr9hWlqRb3fj4j1knYBrpN0xwuTipDG35HrgGFWptae3r6prl1i9NtFrE//bpR0BbAf8JCkWRGxQdIsYON4s+sqiVnZamtiNNuakDQlLVaFpCnAW4HbgSXA8emy44Erx5tVlzDMyta+YRgzgSuKeZ9sA3wrIq6RtAy4RNIJwH3A0eNNwAHDrGTtGrgVEXcDe49y/BHgsHak4YBhVqYAhj3S08wyiJYHZZXKAcOsbA4YZpbNAcPMsgRtnXzWaQ4YZiVzG4aZ5XPAMLMsETBSnTqJA4ZZ2aoTLxwwzMrmNgwzy+eAYWZZKvbks54KGJt5bNMPRi69r8vJTgc2dTnNwk9LSbWczztInxVenX9p3tT1XtFTASMiZnQ7TUnLmy1K0k8G6fNW5rM6YJhZlgCGq9NN4oBhVqqAcMCokq0XUe13g/R5q/FZK1QlGfg1PUdZdbmtJA1LWinpdkmXStrhJdzr65L+PL3+mqS5Da6dL+mgrY83+7yS7pU0Pff4Vtf8qtH5Ua4/U9LHWnlPKzr937Ytar0knX0uSdsMfMDogqfTU6j2Ap4D3l9/UtK4SnkR8dcRsbrBJfOBFwUM60FtWgS4GxwwuutHwGvTr/+PJC0BVkuaKOlfJS2TdJuk9wGo8BVJd0r6AbBL7UaSbpA0L70+XNKtkn4m6XpJe1AEpo+m0s0fSJoh6bKUxjJJB6f3vkLStZJWSfoaxfOBG5L0nfSgnFV1D8upnftCOn69pBnp2G9Iuia950eS3tCOP2bfqFDAcBtGl6SSxBHANenQvsBeEXFP+tI9HhG/J2kS8D+SrgXeCLwemEuxIvRq4IKt7jsDOA84JN1rWkQ8KumrwK8i4t/Sdd8CvhARN0naHVgK/CZwBnBTRHxa0h8BJ2R8nL9KaWwPLJN0WVpodgqwPCI+KulT6d4nU7QlvD8i7pK0P3AOcOg4/oz9JwKGh8vORTYHjM7bXtLK9PpHwPkUVYWfRMQ96fhbgd+ptU8AU4E5wCHA4ogYBh6Q9N+j3P8A4MbavSLi0THy8YfA3LQEPcDOknZMafxZeu9VknKe6v0hSW9Pr3dLeX2EYhrVt9Pxi4DLUxoHAZfWpT0pI43B0SOlhxwOGJ33dETsU38gfXGerD8EfDAilm513ZFtzMcE4ICIeGaUvGSTNJ8i+BwYEU9JugGYPMblkdL95dZ/A6tToYDhNozesBT4G0nbAkh6XXpy1Y3AO1MbxyzgzaO89xbgEEl7pvdOS8c3AzvVXXct8MHajqTaF/hG4Nh07Ajg5U3yOhV4LAWLN1CUcGomALVS0rEUVZ0ngHskvSOlIUkvenbG4OrK09vbxgGjN3yNon3iVkm3A/9BUfq7ArgrnbsQuHnrN0bEw8CJFMX/n/F8leC7wNtrjZ7Ah4B5qVF1Nc/31vwDRcBZRVE1+b8meb0G2EbSGuCzFAGr5klgv/QZDgU+nY4fB5yQ8rcKWJDxNxkMAREjWVsvUFSoOGTWb6ZuMyMO3PmorGuXPva1FWXPjXEbhlnZKvSj7YBhViZ3q5pZK8KLAJtZnt4ZxZnDAcOsTBVbos/dqmZli5G8LUOaV3SnpLWSTmt3Vl3CMCtRANGmEoakicDZwFuAdRTzfJY0mdXcEpcwzMoU0c4Sxn7A2oi4OyKeAy6mzYPkXMIwK1m0r1t1NnB/3f46YP923RwcMMxKtZnHlv4g/qvhSmZ1JktaXre/sNurijlgmJUoIg5v4+3WUyw3ULNrOtY2bsMw6x/LgDmS9pS0HXAMsKSdCbiEYdYnImJI0skUyyVMBC6IiFXtTMOzVc0sm6skZpbNAcPMsjlgmFk2Bwwzy+aAYWbZHDDMLJsDhpllc8Aws2z/D3pim9MC48yhAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 288x288 with 2 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "lA3dXqDuOsom",
"colab_type": "code",
"outputId": "e560d9b7-145b-4ccd-f423-74ede2b43098",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"print(\"Accuracy on test set: {:.3f}\".format(forest.score(xTest, test.sentiment)))"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Accuracy on test set: 0.775\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Jq4Vqx0g6fK-",
"colab_type": "text"
},
"source": [
"## Predicting with the Model"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "kjwQm2Sryj5M",
"colab_type": "text"
},
"source": [
"Since the model has been trained successfully, we can evaluate its performance on some test queries using Magnitude to convert the test queries into a sequence of vectors that be passed directly into the model for inference (prediction)."
]
},
{
"cell_type": "code",
"metadata": {
"id": "n4U11gR9TuxF",
"colab_type": "code",
"outputId": "ee747375-61af-48a9-a5d9-8f08e37c3d81",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"test.text.values[0]"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"\"For the last quarter of 2010 , Componenta 's net sales doubled to EUR131m from EUR76m for the same period a year earlier , while it moved to a zero pre-tax profit from a pre-tax loss of EUR7m .\""
]
},
"metadata": {
"tags": []
},
"execution_count": 159
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "cBJTvJsoT0EP",
"colab_type": "code",
"outputId": "338bac31-c7e5-4d5c-bf8c-88096b6d1067",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"x=[np.average(vectors.query(test.text.values[0].split(' ')), axis = 0)]\n",
"forest.predict(x)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([1])"
]
},
"metadata": {
"tags": []
},
"execution_count": 160
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "4Rc-5dxWT7S5",
"colab_type": "code",
"outputId": "aedc3643-de21-4563-bf4f-fda935fcca8b",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"test.sentiment.values[0]"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"1"
]
},
"metadata": {
"tags": []
},
"execution_count": 161
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "1kF5TpmrUP_a",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment