ivaninkv/Ml.ipynb

## Ml.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_x = pd.read_csv(\"files/x_train.csv\", sep=\";\")\n",
    "test_x = pd.read_csv(\"files/x_test.csv\", sep=\";\")\n",
    "train_y = pd.read_csv(\"files/y_train.csv\", header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "\n",
    "import logging\n",
    "import numpy as np\n",
    "\n",
    "from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n",
    "from sklearn.linear_model import LogisticRegressionCV\n",
    "from sklearn.metrics import mean_absolute_error\n",
    "from sklearn.neighbors import KNeighborsRegressor\n",
    "from xgboost import XGBClassifier\n",
    "\n",
    "from heamy.dataset import Dataset\n",
    "from heamy.estimator import Regressor, Classifier\n",
    "from heamy.pipeline import ModelsPipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "dataset = Dataset(train_x.as_matrix(), train_y.values, test_x.as_matrix())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "xgbparams1 = {\"learning_rate\": 0.01, \"max_depth\": 6, \"n_estimators\": 500, \"subsample\": 0.8}\n",
    "xgbparams2 = {\"learning_rate\": 0.005, \"max_depth\": 8, \"n_estimators\": 1500, \"subsample\": 0.9}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "etparams = {\"n_estimators\": 50, \"n_jobs\": -1}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "rfparams = {\"n_estimators\": 100, \"n_jobs\": -1}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "xgbclf1 = Classifier(dataset=dataset, estimator=XGBClassifier, parameters=xgbparams1)\n",
    "xgbclf2 = Classifier(dataset=dataset, estimator=XGBClassifier, parameters=xgbparams2)\n",
    "etclf = Classifier(dataset=dataset, estimator=ExtraTreesClassifier, parameters=etparams)\n",
    "rfclf = Classifier(dataset=dataset, estimator=ExtraTreesClassifier, parameters=rfparams)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pipeline = ModelsPipeline(etclf, rfclf, xgbclf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  estimator.fit(X_train, y_train)\n",
      "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  estimator.fit(X_train, y_train)\n",
      "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  estimator.fit(X_train, y_train)\n",
      "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  estimator.fit(X_train, y_train)\n",
      "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  estimator.fit(X_train, y_train)\n",
      "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  estimator.fit(X_train, y_train)\n"
     ]
    }
   ],
   "source": [
    "stack_ds = pipeline.stack(k=5, seed=111)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Best Score (mean_absolute_error): 0.236843582978\n",
      "Best Weights: [  8.27145227e-20   3.97330676e-06   9.99996027e-01]\n",
      "---\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  estimator.fit(X_train, y_train)\n"
     ]
    }
   ],
   "source": [
    "# 2nd level\n",
    "stack_rf = Classifier(dataset=stack_ds, estimator=RandomForestClassifier, parameters={'n_estimators': 100},name='rf')\n",
    "stack_lr = Classifier(dataset=stack_ds, estimator=LogisticRegressionCV, name='lr')\n",
    "stack_et = Classifier(dataset=stack_ds, estimator=ExtraTreesClassifier, name='et')\n",
    "stack_pipeline = ModelsPipeline(stack_rf,stack_lr, stack_et)\n",
    "\n",
    "# 3rd level\n",
    "weights = stack_pipeline.find_weights(mean_absolute_error)\n",
    "print('---')\n",
    "result = stack_pipeline.weight(weights)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  estimator.fit(X_train, y_train)\n",
      "/home/akis/.local/lib/python3.5/site-packages/sklearn/utils/validation.py:526: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  estimator.fit(X_train, y_train)\n"
     ]
    }
   ],
   "source": [
    "subm = np.round(result.execute()).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "test_df = pd.DataFrame()\n",
    "test_df[\"pred\"] = subm\n",
    "test_df.to_csv(\"sumb.csv\", index=None, header=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"train_x = pd.read_csv(\"files/x_train.csv\", sep=\";\")\n",
	"test_x = pd.read_csv(\"files/x_test.csv\", sep=\";\")\n",
	"train_y = pd.read_csv(\"files/y_train.csv\", header=None)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 58,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"\n",
	"import logging\n",
	"import numpy as np\n",
	"\n",
	"from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n",
	"from sklearn.linear_model import LogisticRegressionCV\n",
	"from sklearn.metrics import mean_absolute_error\n",
	"from sklearn.neighbors import KNeighborsRegressor\n",
	"from xgboost import XGBClassifier\n",
	"\n",
	"from heamy.dataset import Dataset\n",
	"from heamy.estimator import Regressor, Classifier\n",
	"from heamy.pipeline import ModelsPipeline"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 59,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"dataset = Dataset(train_x.as_matrix(), train_y.values, test_x.as_matrix())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 64,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"xgbparams1 = {\"learning_rate\": 0.01, \"max_depth\": 6, \"n_estimators\": 500, \"subsample\": 0.8}\n",
	"xgbparams2 = {\"learning_rate\": 0.005, \"max_depth\": 8, \"n_estimators\": 1500, \"subsample\": 0.9}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 65,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"etparams = {\"n_estimators\": 50, \"n_jobs\": -1}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 66,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"rfparams = {\"n_estimators\": 100, \"n_jobs\": -1}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 67,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"xgbclf1 = Classifier(dataset=dataset, estimator=XGBClassifier, parameters=xgbparams1)\n",
	"xgbclf2 = Classifier(dataset=dataset, estimator=XGBClassifier, parameters=xgbparams2)\n",
	"etclf = Classifier(dataset=dataset, estimator=ExtraTreesClassifier, parameters=etparams)\n",
	"rfclf = Classifier(dataset=dataset, estimator=ExtraTreesClassifier, parameters=rfparams)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 68,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"pipeline = ModelsPipeline(etclf, rfclf, xgbclf)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 69,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
	" estimator.fit(X_train, y_train)\n",
	"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
	" estimator.fit(X_train, y_train)\n",
	"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
	" estimator.fit(X_train, y_train)\n",
	"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
	" estimator.fit(X_train, y_train)\n",
	"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
	" estimator.fit(X_train, y_train)\n",
	"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
	" estimator.fit(X_train, y_train)\n"
	]
	}
	],
	"source": [
	"stack_ds = pipeline.stack(k=5, seed=111)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 71,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Best Score (mean_absolute_error): 0.236843582978\n",
	"Best Weights: [ 8.27145227e-20 3.97330676e-06 9.99996027e-01]\n",
	"---\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
	" estimator.fit(X_train, y_train)\n"
	]
	}
	],
	"source": [
	"# 2nd level\n",
	"stack_rf = Classifier(dataset=stack_ds, estimator=RandomForestClassifier, parameters={'n_estimators': 100},name='rf')\n",
	"stack_lr = Classifier(dataset=stack_ds, estimator=LogisticRegressionCV, name='lr')\n",
	"stack_et = Classifier(dataset=stack_ds, estimator=ExtraTreesClassifier, name='et')\n",
	"stack_pipeline = ModelsPipeline(stack_rf,stack_lr, stack_et)\n",
	"\n",
	"# 3rd level\n",
	"weights = stack_pipeline.find_weights(mean_absolute_error)\n",
	"print('---')\n",
	"result = stack_pipeline.weight(weights)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 72,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
	" estimator.fit(X_train, y_train)\n",
	"/home/akis/.local/lib/python3.5/site-packages/sklearn/utils/validation.py:526: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
	" y = column_or_1d(y, warn=True)\n",
	"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
	" estimator.fit(X_train, y_train)\n"
	]
	}
	],
	"source": [
	"subm = np.round(result.execute()).astype(int)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 73,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"test_df = pd.DataFrame()\n",
	"test_df[\"pred\"] = subm\n",
	"test_df.to_csv(\"sumb.csv\", index=None, header=None)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
No results found