Created
February 18, 2017 10:39
-
-
Save ivaninkv/6d5fcc82121d4c499fc19c28d706f7fd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "train_x = pd.read_csv(\"files/x_train.csv\", sep=\";\")\n", | |
| "test_x = pd.read_csv(\"files/x_test.csv\", sep=\";\")\n", | |
| "train_y = pd.read_csv(\"files/y_train.csv\", header=None)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 58, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "\n", | |
| "import logging\n", | |
| "import numpy as np\n", | |
| "\n", | |
| "from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n", | |
| "from sklearn.linear_model import LogisticRegressionCV\n", | |
| "from sklearn.metrics import mean_absolute_error\n", | |
| "from sklearn.neighbors import KNeighborsRegressor\n", | |
| "from xgboost import XGBClassifier\n", | |
| "\n", | |
| "from heamy.dataset import Dataset\n", | |
| "from heamy.estimator import Regressor, Classifier\n", | |
| "from heamy.pipeline import ModelsPipeline" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 59, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "dataset = Dataset(train_x.as_matrix(), train_y.values, test_x.as_matrix())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 64, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "xgbparams1 = {\"learning_rate\": 0.01, \"max_depth\": 6, \"n_estimators\": 500, \"subsample\": 0.8}\n", | |
| "xgbparams2 = {\"learning_rate\": 0.005, \"max_depth\": 8, \"n_estimators\": 1500, \"subsample\": 0.9}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 65, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "etparams = {\"n_estimators\": 50, \"n_jobs\": -1}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 66, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "rfparams = {\"n_estimators\": 100, \"n_jobs\": -1}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 67, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "xgbclf1 = Classifier(dataset=dataset, estimator=XGBClassifier, parameters=xgbparams1)\n", | |
| "xgbclf2 = Classifier(dataset=dataset, estimator=XGBClassifier, parameters=xgbparams2)\n", | |
| "etclf = Classifier(dataset=dataset, estimator=ExtraTreesClassifier, parameters=etparams)\n", | |
| "rfclf = Classifier(dataset=dataset, estimator=ExtraTreesClassifier, parameters=rfparams)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 68, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "pipeline = ModelsPipeline(etclf, rfclf, xgbclf)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 69, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", | |
| " estimator.fit(X_train, y_train)\n", | |
| "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", | |
| " estimator.fit(X_train, y_train)\n", | |
| "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", | |
| " estimator.fit(X_train, y_train)\n", | |
| "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", | |
| " estimator.fit(X_train, y_train)\n", | |
| "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", | |
| " estimator.fit(X_train, y_train)\n", | |
| "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", | |
| " estimator.fit(X_train, y_train)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "stack_ds = pipeline.stack(k=5, seed=111)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 71, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Best Score (mean_absolute_error): 0.236843582978\n", | |
| "Best Weights: [ 8.27145227e-20 3.97330676e-06 9.99996027e-01]\n", | |
| "---\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", | |
| " estimator.fit(X_train, y_train)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# 2nd level\n", | |
| "stack_rf = Classifier(dataset=stack_ds, estimator=RandomForestClassifier, parameters={'n_estimators': 100},name='rf')\n", | |
| "stack_lr = Classifier(dataset=stack_ds, estimator=LogisticRegressionCV, name='lr')\n", | |
| "stack_et = Classifier(dataset=stack_ds, estimator=ExtraTreesClassifier, name='et')\n", | |
| "stack_pipeline = ModelsPipeline(stack_rf,stack_lr, stack_et)\n", | |
| "\n", | |
| "# 3rd level\n", | |
| "weights = stack_pipeline.find_weights(mean_absolute_error)\n", | |
| "print('---')\n", | |
| "result = stack_pipeline.weight(weights)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 72, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", | |
| " estimator.fit(X_train, y_train)\n", | |
| "/home/akis/.local/lib/python3.5/site-packages/sklearn/utils/validation.py:526: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", | |
| " y = column_or_1d(y, warn=True)\n", | |
| "/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", | |
| " estimator.fit(X_train, y_train)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "subm = np.round(result.execute()).astype(int)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 73, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "test_df = pd.DataFrame()\n", | |
| "test_df[\"pred\"] = subm\n", | |
| "test_df.to_csv(\"sumb.csv\", index=None, header=None)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment