Skip to content

Instantly share code, notes, and snippets.

@ivaninkv
Created February 18, 2017 10:39
Show Gist options
  • Select an option

  • Save ivaninkv/6d5fcc82121d4c499fc19c28d706f7fd to your computer and use it in GitHub Desktop.

Select an option

Save ivaninkv/6d5fcc82121d4c499fc19c28d706f7fd to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"train_x = pd.read_csv(\"files/x_train.csv\", sep=\";\")\n",
"test_x = pd.read_csv(\"files/x_test.csv\", sep=\";\")\n",
"train_y = pd.read_csv(\"files/y_train.csv\", header=None)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"\n",
"import logging\n",
"import numpy as np\n",
"\n",
"from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier\n",
"from sklearn.linear_model import LogisticRegressionCV\n",
"from sklearn.metrics import mean_absolute_error\n",
"from sklearn.neighbors import KNeighborsRegressor\n",
"from xgboost import XGBClassifier\n",
"\n",
"from heamy.dataset import Dataset\n",
"from heamy.estimator import Regressor, Classifier\n",
"from heamy.pipeline import ModelsPipeline"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"dataset = Dataset(train_x.as_matrix(), train_y.values, test_x.as_matrix())"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"xgbparams1 = {\"learning_rate\": 0.01, \"max_depth\": 6, \"n_estimators\": 500, \"subsample\": 0.8}\n",
"xgbparams2 = {\"learning_rate\": 0.005, \"max_depth\": 8, \"n_estimators\": 1500, \"subsample\": 0.9}"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"etparams = {\"n_estimators\": 50, \"n_jobs\": -1}"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rfparams = {\"n_estimators\": 100, \"n_jobs\": -1}"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"xgbclf1 = Classifier(dataset=dataset, estimator=XGBClassifier, parameters=xgbparams1)\n",
"xgbclf2 = Classifier(dataset=dataset, estimator=XGBClassifier, parameters=xgbparams2)\n",
"etclf = Classifier(dataset=dataset, estimator=ExtraTreesClassifier, parameters=etparams)\n",
"rfclf = Classifier(dataset=dataset, estimator=ExtraTreesClassifier, parameters=rfparams)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pipeline = ModelsPipeline(etclf, rfclf, xgbclf)"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
" estimator.fit(X_train, y_train)\n",
"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
" estimator.fit(X_train, y_train)\n",
"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
" estimator.fit(X_train, y_train)\n",
"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
" estimator.fit(X_train, y_train)\n",
"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
" estimator.fit(X_train, y_train)\n",
"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
" estimator.fit(X_train, y_train)\n"
]
}
],
"source": [
"stack_ds = pipeline.stack(k=5, seed=111)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best Score (mean_absolute_error): 0.236843582978\n",
"Best Weights: [ 8.27145227e-20 3.97330676e-06 9.99996027e-01]\n",
"---\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
" estimator.fit(X_train, y_train)\n"
]
}
],
"source": [
"# 2nd level\n",
"stack_rf = Classifier(dataset=stack_ds, estimator=RandomForestClassifier, parameters={'n_estimators': 100},name='rf')\n",
"stack_lr = Classifier(dataset=stack_ds, estimator=LogisticRegressionCV, name='lr')\n",
"stack_et = Classifier(dataset=stack_ds, estimator=ExtraTreesClassifier, name='et')\n",
"stack_pipeline = ModelsPipeline(stack_rf,stack_lr, stack_et)\n",
"\n",
"# 3rd level\n",
"weights = stack_pipeline.find_weights(mean_absolute_error)\n",
"print('---')\n",
"result = stack_pipeline.weight(weights)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
" estimator.fit(X_train, y_train)\n",
"/home/akis/.local/lib/python3.5/site-packages/sklearn/utils/validation.py:526: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n",
"/usr/local/lib/python3.5/dist-packages/heamy/estimator.py:108: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
" estimator.fit(X_train, y_train)\n"
]
}
],
"source": [
"subm = np.round(result.execute()).astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"test_df = pd.DataFrame()\n",
"test_df[\"pred\"] = subm\n",
"test_df.to_csv(\"sumb.csv\", index=None, header=None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment