Last active
August 3, 2017 14:39
-
-
Save kmike/f978d29a250070a7236c0485b1b1f182 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from sklearn.linear_model import LinearRegression, LogisticRegression\n", | |
| "from sklearn.svm import LinearSVC, LinearSVR\n", | |
| "from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n", | |
| "from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor\n", | |
| "from sklearn.datasets import load_boston, load_iris, load_diabetes, make_classification, make_regression\n", | |
| "from sklearn.preprocessing import StandardScaler\n", | |
| "from sklearn.pipeline import make_pipeline, Pipeline\n", | |
| "from sklearn.metrics import r2_score\n", | |
| "import numpy as np\n", | |
| "import pandas as pd\n", | |
| "from scipy.stats import spearmanr, pearsonr\n", | |
| "\n", | |
| "import eli5\n", | |
| "from eli5.sklearn import PermutationImportance" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def dcg_score(y_true, y_score, k=10):\n", | |
| " order = np.argsort(y_score)[::-1]\n", | |
| " y_true = np.take(y_true, order[:k])\n", | |
| " gains = 2 ** y_true - 1\n", | |
| " # highest rank is 1 so +2 instead of +1\n", | |
| " discounts = np.log2(np.arange(len(y_true)) + 2)\n", | |
| " return np.sum(gains / discounts)\n", | |
| "\n", | |
| "def ndcg_score(y_true, y_score, k=10):\n", | |
| " best = dcg_score(y_true, y_true, k)\n", | |
| " actual = dcg_score(y_true, y_score, k)\n", | |
| " return actual / best" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 46, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def get_classification_datasets():\n", | |
| " res = []\n", | |
| "\n", | |
| " data = load_iris()\n", | |
| " res.append(('iris_binary', data.data, data.target != 0, data.feature_names))\n", | |
| " \n", | |
| " X, y = make_classification(n_informative=5, n_redundant=0)\n", | |
| " res.append(('CLF(n_informative=5, n_redundant=0)', X, y, None))\n", | |
| " \n", | |
| " X, y = make_classification(n_informative=5, n_redundant=4)\n", | |
| " res.append(('CLF(n_informative=5, n_redundant=4)', X, y, None))\n", | |
| "\n", | |
| " X, y = make_classification(n_informative=1, n_redundant=4, n_clusters_per_class=1)\n", | |
| " res.append(('CLF(n_informative=1, n_redundant=4)', X, y, None))\n", | |
| "\n", | |
| " X, y = make_classification(n_informative=20, n_redundant=0)\n", | |
| " res.append(('CLF(n_informative=20, n_redundant=0)', X, y, None))\n", | |
| "\n", | |
| " return res\n", | |
| "\n", | |
| "\n", | |
| "def get_regression_datasets():\n", | |
| " res = []\n", | |
| " \n", | |
| " data = load_boston()\n", | |
| " res.append(('boston', data.data, data.target, data.feature_names))\n", | |
| " \n", | |
| " data = load_diabetes()\n", | |
| " res.append(('diabetese', data.data, data.target, None))\n", | |
| " \n", | |
| " X, y = make_regression(n_informative=5)\n", | |
| " res.append(('REG(n_informative=5)', X, y, None))\n", | |
| " \n", | |
| " X, y = make_regression(n_informative=5, effective_rank=2)\n", | |
| " res.append(('REG(n_informative=5, effective_rank=2)', X, y, None))\n", | |
| "\n", | |
| " X, y = make_regression(n_informative=1)\n", | |
| " res.append(('REG(n_informative=1)', X, y, None))\n", | |
| "\n", | |
| " X, y = make_regression(n_informative=20)\n", | |
| " res.append(('REG(n_informative=20)', X, y, None))\n", | |
| "\n", | |
| " return res\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 47, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def get_classifiers():\n", | |
| " return [\n", | |
| " LogisticRegression(),\n", | |
| " LinearSVC(),\n", | |
| " RandomForestClassifier(),\n", | |
| " DecisionTreeClassifier(),\n", | |
| " ]\n", | |
| "\n", | |
| "\n", | |
| "def get_regressors():\n", | |
| " return [\n", | |
| " make_pipeline(StandardScaler(), LinearRegression()),\n", | |
| " make_pipeline(StandardScaler(), LinearSVR()),\n", | |
| " RandomForestRegressor(),\n", | |
| " DecisionTreeRegressor(),\n", | |
| " ]\n", | |
| " " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 48, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def get_explanations(est, X, y, feature_names):\n", | |
| " df_inspect = eli5.explain_weights_df(est, feature_names=feature_names, top=100)\n", | |
| " if isinstance(df_inspect.index, pd.MultiIndex):\n", | |
| " df_inspect.index = df_inspect.index.droplevel()\n", | |
| " df_inspect.index.name = None\n", | |
| " \n", | |
| " pi = PermutationImportance(est, cv='prefit', n_iter=10).fit(X, y)\n", | |
| " df_pi = eli5.explain_weights_df(pi, feature_names=feature_names, top=100)\n", | |
| " \n", | |
| " pi_cv = PermutationImportance(est, cv=5, n_iter=10).fit(X, y)\n", | |
| " df_picv = eli5.explain_weights_df(pi_cv, feature_names=feature_names, top=100)\n", | |
| " \n", | |
| " df = pd.concat([df_inspect.weight, df_pi.weight, df_picv.weight], axis=1)\n", | |
| " df.columns=['w_inspect', 'w_pi', 'w_picv']\n", | |
| " df = df.dropna() / df.abs().sum()\n", | |
| " return df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 65, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def get_scores(df):\n", | |
| " w_inspect_abs = df.w_inspect.abs().values\n", | |
| " def _scores(column):\n", | |
| " return {\n", | |
| " 'SpearmanR': spearmanr(w_inspect_abs, column.values).correlation,\n", | |
| " 'NDCG': ndcg_score(w_inspect_abs, column.values, 100000),\n", | |
| " 'NDCG@5': ndcg_score(w_inspect_abs, column.values, 5),\n", | |
| " 'Pearson': pearsonr(w_inspect_abs, column.values)[0],\n", | |
| "# 'R^2': r2_score(w_inspect_abs, column.values),\n", | |
| " 'L2': np.linalg.norm(w_inspect_abs - column.values),\n", | |
| " }\n", | |
| " return {\n", | |
| " 'PI': _scores(df.w_pi),\n", | |
| " 'PICV': _scores(df.w_picv),\n", | |
| " }" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 66, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "done: LogisticRegression iris_binary\n", | |
| "done: LinearSVC iris_binary\n", | |
| "done: RandomForestClassifier iris_binary\n", | |
| "done: DecisionTreeClassifier iris_binary\n", | |
| "done: LogisticRegression CLF(n_informative=5, n_redundant=0)\n", | |
| "done: LinearSVC CLF(n_informative=5, n_redundant=0)\n", | |
| "done: RandomForestClassifier CLF(n_informative=5, n_redundant=0)\n", | |
| "done: DecisionTreeClassifier CLF(n_informative=5, n_redundant=0)\n", | |
| "done: LogisticRegression CLF(n_informative=5, n_redundant=4)\n", | |
| "done: LinearSVC CLF(n_informative=5, n_redundant=4)\n", | |
| "done: RandomForestClassifier CLF(n_informative=5, n_redundant=4)\n", | |
| "done: DecisionTreeClassifier CLF(n_informative=5, n_redundant=4)\n", | |
| "done: LogisticRegression CLF(n_informative=1, n_redundant=4)\n", | |
| "done: LinearSVC CLF(n_informative=1, n_redundant=4)\n", | |
| "done: RandomForestClassifier CLF(n_informative=1, n_redundant=4)\n", | |
| "done: DecisionTreeClassifier CLF(n_informative=1, n_redundant=4)\n", | |
| "done: LogisticRegression CLF(n_informative=20, n_redundant=0)\n", | |
| "done: LinearSVC CLF(n_informative=20, n_redundant=0)\n", | |
| "done: RandomForestClassifier CLF(n_informative=20, n_redundant=0)\n", | |
| "done: DecisionTreeClassifier CLF(n_informative=20, n_redundant=0)\n", | |
| "done: LinearRegression boston\n", | |
| "done: LinearSVR boston\n", | |
| "done: RandomForestRegressor boston\n", | |
| "done: DecisionTreeRegressor boston\n", | |
| "done: LinearRegression diabetese\n", | |
| "done: LinearSVR diabetese\n", | |
| "done: RandomForestRegressor diabetese\n", | |
| "done: DecisionTreeRegressor diabetese\n", | |
| "done: LinearRegression REG(n_informative=5)\n", | |
| "done: LinearSVR REG(n_informative=5)\n", | |
| "done: RandomForestRegressor REG(n_informative=5)\n", | |
| "done: DecisionTreeRegressor REG(n_informative=5)\n", | |
| "done: LinearRegression REG(n_informative=5, effective_rank=2)\n", | |
| "done: LinearSVR REG(n_informative=5, effective_rank=2)\n", | |
| "done: RandomForestRegressor REG(n_informative=5, effective_rank=2)\n", | |
| "done: DecisionTreeRegressor REG(n_informative=5, effective_rank=2)\n", | |
| "done: LinearRegression REG(n_informative=1)\n", | |
| "done: LinearSVR REG(n_informative=1)\n", | |
| "done: RandomForestRegressor REG(n_informative=1)\n", | |
| "done: DecisionTreeRegressor REG(n_informative=1)\n", | |
| "done: LinearRegression REG(n_informative=20)\n", | |
| "done: LinearSVR REG(n_informative=20)\n", | |
| "done: RandomForestRegressor REG(n_informative=20)\n", | |
| "done: DecisionTreeRegressor REG(n_informative=20)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "def get_name(est):\n", | |
| " if isinstance(est, Pipeline):\n", | |
| " est = est.steps[-1][1]\n", | |
| " return est.__class__.__name__\n", | |
| "\n", | |
| "dfs = []\n", | |
| "estimators = {}\n", | |
| "scores = []\n", | |
| "\n", | |
| "def _append(X, y, feature_names, dataset_name, est):\n", | |
| " est.fit(X, y)\n", | |
| " df = get_explanations(est, X, y, feature_names)\n", | |
| " name = get_name(est)\n", | |
| " estimators[name, dataset_name] = est\n", | |
| " dfs.append((name, dataset_name, df))\n", | |
| " for k, v in get_scores(df).items():\n", | |
| " scores.append((name, dataset_name, k, v))\n", | |
| " print(\"done: {} {}\".format(name, dataset_name))\n", | |
| " \n", | |
| "\n", | |
| "for (dataset_name, X, y, feature_names) in get_classification_datasets():\n", | |
| " for clf in get_classifiers():\n", | |
| " _append(X, y, feature_names, dataset_name, clf)\n", | |
| " \n", | |
| "for (dataset_name, X, y, feature_names) in get_regression_datasets():\n", | |
| " for reg in get_regressors():\n", | |
| " _append(X, y, feature_names, dataset_name, reg)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 67, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>L2</th>\n", | |
| " <th>NDCG</th>\n", | |
| " <th>NDCG@5</th>\n", | |
| " <th>Pearson</th>\n", | |
| " <th>SpearmanR</th>\n", | |
| " <th>dataset</th>\n", | |
| " <th>estimator</th>\n", | |
| " <th>type</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0.672614</td>\n", | |
| " <td>0.984817</td>\n", | |
| " <td>0.984817</td>\n", | |
| " <td>0.833270</td>\n", | |
| " <td>0.632456</td>\n", | |
| " <td>iris_binary</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0.672541</td>\n", | |
| " <td>0.984817</td>\n", | |
| " <td>0.984817</td>\n", | |
| " <td>0.833273</td>\n", | |
| " <td>0.632456</td>\n", | |
| " <td>iris_binary</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.675399</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.872404</td>\n", | |
| " <td>0.948683</td>\n", | |
| " <td>iris_binary</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0.671430</td>\n", | |
| " <td>0.999950</td>\n", | |
| " <td>0.999950</td>\n", | |
| " <td>0.873153</td>\n", | |
| " <td>0.632456</td>\n", | |
| " <td>iris_binary</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0.282843</td>\n", | |
| " <td>0.964335</td>\n", | |
| " <td>0.964335</td>\n", | |
| " <td>0.968496</td>\n", | |
| " <td>0.816497</td>\n", | |
| " <td>iris_binary</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>0.109003</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.993515</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>iris_binary</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>iris_binary</td>\n", | |
| " <td>DecisionTreeClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>0.834689</td>\n", | |
| " <td>0.630930</td>\n", | |
| " <td>0.630930</td>\n", | |
| " <td>0.357553</td>\n", | |
| " <td>0.272166</td>\n", | |
| " <td>iris_binary</td>\n", | |
| " <td>DecisionTreeClassifier</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>0.167495</td>\n", | |
| " <td>0.971622</td>\n", | |
| " <td>0.965187</td>\n", | |
| " <td>0.872985</td>\n", | |
| " <td>0.908544</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=0)</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>0.255409</td>\n", | |
| " <td>0.973934</td>\n", | |
| " <td>0.981802</td>\n", | |
| " <td>0.844037</td>\n", | |
| " <td>0.820918</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=0)</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>0.123809</td>\n", | |
| " <td>0.969013</td>\n", | |
| " <td>0.932469</td>\n", | |
| " <td>0.893776</td>\n", | |
| " <td>0.934538</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=0)</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>0.252516</td>\n", | |
| " <td>0.914352</td>\n", | |
| " <td>0.784295</td>\n", | |
| " <td>0.622974</td>\n", | |
| " <td>0.684211</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=0)</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>0.322749</td>\n", | |
| " <td>0.994469</td>\n", | |
| " <td>0.989704</td>\n", | |
| " <td>0.900801</td>\n", | |
| " <td>0.885049</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=0)</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td>0.277373</td>\n", | |
| " <td>0.946303</td>\n", | |
| " <td>0.900302</td>\n", | |
| " <td>0.769257</td>\n", | |
| " <td>0.374577</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=0)</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td>0.072285</td>\n", | |
| " <td>0.993702</td>\n", | |
| " <td>0.970213</td>\n", | |
| " <td>0.981011</td>\n", | |
| " <td>0.973103</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=0)</td>\n", | |
| " <td>DecisionTreeClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15</th>\n", | |
| " <td>0.260483</td>\n", | |
| " <td>0.920732</td>\n", | |
| " <td>0.879966</td>\n", | |
| " <td>0.741018</td>\n", | |
| " <td>0.298858</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=0)</td>\n", | |
| " <td>DecisionTreeClassifier</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>16</th>\n", | |
| " <td>0.273521</td>\n", | |
| " <td>0.981474</td>\n", | |
| " <td>0.973298</td>\n", | |
| " <td>0.812503</td>\n", | |
| " <td>0.817674</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=4)</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>17</th>\n", | |
| " <td>0.264550</td>\n", | |
| " <td>0.946682</td>\n", | |
| " <td>0.940374</td>\n", | |
| " <td>0.756563</td>\n", | |
| " <td>0.520301</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=4)</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>18</th>\n", | |
| " <td>0.250067</td>\n", | |
| " <td>0.966972</td>\n", | |
| " <td>0.913251</td>\n", | |
| " <td>0.768077</td>\n", | |
| " <td>0.706813</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=4)</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>19</th>\n", | |
| " <td>0.304935</td>\n", | |
| " <td>0.957164</td>\n", | |
| " <td>0.938046</td>\n", | |
| " <td>0.744530</td>\n", | |
| " <td>0.711278</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=4)</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>20</th>\n", | |
| " <td>0.275022</td>\n", | |
| " <td>0.989233</td>\n", | |
| " <td>0.981340</td>\n", | |
| " <td>0.971646</td>\n", | |
| " <td>0.749610</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=4)</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>21</th>\n", | |
| " <td>0.263122</td>\n", | |
| " <td>0.955524</td>\n", | |
| " <td>0.922541</td>\n", | |
| " <td>0.930586</td>\n", | |
| " <td>0.383459</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=4)</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>22</th>\n", | |
| " <td>0.138652</td>\n", | |
| " <td>0.998331</td>\n", | |
| " <td>0.998283</td>\n", | |
| " <td>0.976383</td>\n", | |
| " <td>0.997411</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=4)</td>\n", | |
| " <td>DecisionTreeClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>23</th>\n", | |
| " <td>0.247536</td>\n", | |
| " <td>0.944404</td>\n", | |
| " <td>0.877370</td>\n", | |
| " <td>0.928444</td>\n", | |
| " <td>0.173572</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=4)</td>\n", | |
| " <td>DecisionTreeClassifier</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>24</th>\n", | |
| " <td>0.336659</td>\n", | |
| " <td>0.990104</td>\n", | |
| " <td>0.978667</td>\n", | |
| " <td>0.918085</td>\n", | |
| " <td>0.875421</td>\n", | |
| " <td>CLF(n_informative=1, n_redundant=4)</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>25</th>\n", | |
| " <td>0.401469</td>\n", | |
| " <td>0.963876</td>\n", | |
| " <td>0.954015</td>\n", | |
| " <td>0.893001</td>\n", | |
| " <td>0.221249</td>\n", | |
| " <td>CLF(n_informative=1, n_redundant=4)</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>26</th>\n", | |
| " <td>0.332961</td>\n", | |
| " <td>0.992544</td>\n", | |
| " <td>0.973712</td>\n", | |
| " <td>0.937979</td>\n", | |
| " <td>0.845940</td>\n", | |
| " <td>CLF(n_informative=1, n_redundant=4)</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>27</th>\n", | |
| " <td>0.341660</td>\n", | |
| " <td>0.971785</td>\n", | |
| " <td>0.975554</td>\n", | |
| " <td>0.895904</td>\n", | |
| " <td>0.352145</td>\n", | |
| " <td>CLF(n_informative=1, n_redundant=4)</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>28</th>\n", | |
| " <td>0.578752</td>\n", | |
| " <td>0.492292</td>\n", | |
| " <td>0.122312</td>\n", | |
| " <td>-0.288927</td>\n", | |
| " <td>0.191127</td>\n", | |
| " <td>CLF(n_informative=1, n_redundant=4)</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>29</th>\n", | |
| " <td>0.460242</td>\n", | |
| " <td>0.926089</td>\n", | |
| " <td>0.853204</td>\n", | |
| " <td>0.717342</td>\n", | |
| " <td>0.405126</td>\n", | |
| " <td>CLF(n_informative=1, n_redundant=4)</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>58</th>\n", | |
| " <td>0.235361</td>\n", | |
| " <td>0.991899</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.927546</td>\n", | |
| " <td>0.737489</td>\n", | |
| " <td>REG(n_informative=5)</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>59</th>\n", | |
| " <td>0.189558</td>\n", | |
| " <td>0.980900</td>\n", | |
| " <td>0.999842</td>\n", | |
| " <td>0.927359</td>\n", | |
| " <td>0.506976</td>\n", | |
| " <td>REG(n_informative=5)</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>60</th>\n", | |
| " <td>0.142030</td>\n", | |
| " <td>0.990276</td>\n", | |
| " <td>0.984540</td>\n", | |
| " <td>0.980935</td>\n", | |
| " <td>0.779370</td>\n", | |
| " <td>REG(n_informative=5)</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>61</th>\n", | |
| " <td>0.124729</td>\n", | |
| " <td>0.901260</td>\n", | |
| " <td>0.907927</td>\n", | |
| " <td>0.939584</td>\n", | |
| " <td>0.163636</td>\n", | |
| " <td>REG(n_informative=5)</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>62</th>\n", | |
| " <td>0.107740</td>\n", | |
| " <td>0.988112</td>\n", | |
| " <td>0.977762</td>\n", | |
| " <td>0.975555</td>\n", | |
| " <td>0.999033</td>\n", | |
| " <td>REG(n_informative=5)</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>63</th>\n", | |
| " <td>0.179533</td>\n", | |
| " <td>0.911248</td>\n", | |
| " <td>0.942287</td>\n", | |
| " <td>0.883271</td>\n", | |
| " <td>-0.133398</td>\n", | |
| " <td>REG(n_informative=5)</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>64</th>\n", | |
| " <td>0.367224</td>\n", | |
| " <td>0.999991</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.967209</td>\n", | |
| " <td>0.999629</td>\n", | |
| " <td>REG(n_informative=5, effective_rank=2)</td>\n", | |
| " <td>LinearRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>65</th>\n", | |
| " <td>0.238332</td>\n", | |
| " <td>0.971812</td>\n", | |
| " <td>0.959122</td>\n", | |
| " <td>0.973212</td>\n", | |
| " <td>0.211095</td>\n", | |
| " <td>REG(n_informative=5, effective_rank=2)</td>\n", | |
| " <td>LinearRegression</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>66</th>\n", | |
| " <td>0.401022</td>\n", | |
| " <td>0.999889</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.966992</td>\n", | |
| " <td>0.996933</td>\n", | |
| " <td>REG(n_informative=5, effective_rank=2)</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>67</th>\n", | |
| " <td>0.284760</td>\n", | |
| " <td>0.980323</td>\n", | |
| " <td>0.978061</td>\n", | |
| " <td>0.977250</td>\n", | |
| " <td>0.509969</td>\n", | |
| " <td>REG(n_informative=5, effective_rank=2)</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>68</th>\n", | |
| " <td>0.258505</td>\n", | |
| " <td>0.995616</td>\n", | |
| " <td>0.995906</td>\n", | |
| " <td>0.985579</td>\n", | |
| " <td>0.833975</td>\n", | |
| " <td>REG(n_informative=5, effective_rank=2)</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>69</th>\n", | |
| " <td>0.297123</td>\n", | |
| " <td>0.943655</td>\n", | |
| " <td>0.944391</td>\n", | |
| " <td>0.974220</td>\n", | |
| " <td>0.068419</td>\n", | |
| " <td>REG(n_informative=5, effective_rank=2)</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>70</th>\n", | |
| " <td>0.067249</td>\n", | |
| " <td>0.999453</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.993935</td>\n", | |
| " <td>0.998769</td>\n", | |
| " <td>REG(n_informative=5, effective_rank=2)</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>71</th>\n", | |
| " <td>0.164108</td>\n", | |
| " <td>0.978136</td>\n", | |
| " <td>0.967044</td>\n", | |
| " <td>0.966069</td>\n", | |
| " <td>0.342775</td>\n", | |
| " <td>REG(n_informative=5, effective_rank=2)</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>72</th>\n", | |
| " <td>0.452882</td>\n", | |
| " <td>0.999998</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.998365</td>\n", | |
| " <td>0.999654</td>\n", | |
| " <td>REG(n_informative=1)</td>\n", | |
| " <td>LinearRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>73</th>\n", | |
| " <td>0.314697</td>\n", | |
| " <td>0.972598</td>\n", | |
| " <td>0.969714</td>\n", | |
| " <td>0.998146</td>\n", | |
| " <td>0.065281</td>\n", | |
| " <td>REG(n_informative=1)</td>\n", | |
| " <td>LinearRegression</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>74</th>\n", | |
| " <td>0.624122</td>\n", | |
| " <td>0.992905</td>\n", | |
| " <td>0.983652</td>\n", | |
| " <td>0.959778</td>\n", | |
| " <td>0.900569</td>\n", | |
| " <td>REG(n_informative=1)</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>75</th>\n", | |
| " <td>0.516824</td>\n", | |
| " <td>0.979657</td>\n", | |
| " <td>0.973410</td>\n", | |
| " <td>0.962190</td>\n", | |
| " <td>0.541348</td>\n", | |
| " <td>REG(n_informative=1)</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>76</th>\n", | |
| " <td>0.017653</td>\n", | |
| " <td>0.999450</td>\n", | |
| " <td>0.998772</td>\n", | |
| " <td>0.999990</td>\n", | |
| " <td>0.610201</td>\n", | |
| " <td>REG(n_informative=1)</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>77</th>\n", | |
| " <td>0.014198</td>\n", | |
| " <td>0.997467</td>\n", | |
| " <td>0.997146</td>\n", | |
| " <td>0.999982</td>\n", | |
| " <td>0.060978</td>\n", | |
| " <td>REG(n_informative=1)</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>78</th>\n", | |
| " <td>0.003633</td>\n", | |
| " <td>0.999976</td>\n", | |
| " <td>0.999910</td>\n", | |
| " <td>0.999997</td>\n", | |
| " <td>0.997796</td>\n", | |
| " <td>REG(n_informative=1)</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>79</th>\n", | |
| " <td>0.014089</td>\n", | |
| " <td>0.997671</td>\n", | |
| " <td>0.995892</td>\n", | |
| " <td>0.999954</td>\n", | |
| " <td>0.069229</td>\n", | |
| " <td>REG(n_informative=1)</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>80</th>\n", | |
| " <td>0.096921</td>\n", | |
| " <td>0.999998</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.968106</td>\n", | |
| " <td>0.999678</td>\n", | |
| " <td>REG(n_informative=20)</td>\n", | |
| " <td>LinearRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>81</th>\n", | |
| " <td>0.095981</td>\n", | |
| " <td>0.989010</td>\n", | |
| " <td>0.984276</td>\n", | |
| " <td>0.932558</td>\n", | |
| " <td>0.446691</td>\n", | |
| " <td>REG(n_informative=20)</td>\n", | |
| " <td>LinearRegression</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>82</th>\n", | |
| " <td>0.120204</td>\n", | |
| " <td>0.987079</td>\n", | |
| " <td>0.973107</td>\n", | |
| " <td>0.899272</td>\n", | |
| " <td>0.822808</td>\n", | |
| " <td>REG(n_informative=20)</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>83</th>\n", | |
| " <td>0.125489</td>\n", | |
| " <td>0.987371</td>\n", | |
| " <td>0.973107</td>\n", | |
| " <td>0.904354</td>\n", | |
| " <td>0.797415</td>\n", | |
| " <td>REG(n_informative=20)</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>84</th>\n", | |
| " <td>0.108996</td>\n", | |
| " <td>0.992210</td>\n", | |
| " <td>0.994836</td>\n", | |
| " <td>0.972288</td>\n", | |
| " <td>0.876028</td>\n", | |
| " <td>REG(n_informative=20)</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>85</th>\n", | |
| " <td>0.207201</td>\n", | |
| " <td>0.780758</td>\n", | |
| " <td>0.687252</td>\n", | |
| " <td>0.502312</td>\n", | |
| " <td>0.156832</td>\n", | |
| " <td>REG(n_informative=20)</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>86</th>\n", | |
| " <td>0.128705</td>\n", | |
| " <td>0.936889</td>\n", | |
| " <td>0.886772</td>\n", | |
| " <td>0.939217</td>\n", | |
| " <td>0.998556</td>\n", | |
| " <td>REG(n_informative=20)</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>87</th>\n", | |
| " <td>0.340442</td>\n", | |
| " <td>0.612293</td>\n", | |
| " <td>0.429809</td>\n", | |
| " <td>0.244711</td>\n", | |
| " <td>0.032248</td>\n", | |
| " <td>REG(n_informative=20)</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PICV</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>88 rows × 8 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " L2 NDCG NDCG@5 Pearson SpearmanR \\\n", | |
| "0 0.672614 0.984817 0.984817 0.833270 0.632456 \n", | |
| "1 0.672541 0.984817 0.984817 0.833273 0.632456 \n", | |
| "2 0.675399 1.000000 1.000000 0.872404 0.948683 \n", | |
| "3 0.671430 0.999950 0.999950 0.873153 0.632456 \n", | |
| "4 0.282843 0.964335 0.964335 0.968496 0.816497 \n", | |
| "5 0.109003 1.000000 1.000000 0.993515 1.000000 \n", | |
| "6 0.000000 1.000000 1.000000 1.000000 1.000000 \n", | |
| "7 0.834689 0.630930 0.630930 0.357553 0.272166 \n", | |
| "8 0.167495 0.971622 0.965187 0.872985 0.908544 \n", | |
| "9 0.255409 0.973934 0.981802 0.844037 0.820918 \n", | |
| "10 0.123809 0.969013 0.932469 0.893776 0.934538 \n", | |
| "11 0.252516 0.914352 0.784295 0.622974 0.684211 \n", | |
| "12 0.322749 0.994469 0.989704 0.900801 0.885049 \n", | |
| "13 0.277373 0.946303 0.900302 0.769257 0.374577 \n", | |
| "14 0.072285 0.993702 0.970213 0.981011 0.973103 \n", | |
| "15 0.260483 0.920732 0.879966 0.741018 0.298858 \n", | |
| "16 0.273521 0.981474 0.973298 0.812503 0.817674 \n", | |
| "17 0.264550 0.946682 0.940374 0.756563 0.520301 \n", | |
| "18 0.250067 0.966972 0.913251 0.768077 0.706813 \n", | |
| "19 0.304935 0.957164 0.938046 0.744530 0.711278 \n", | |
| "20 0.275022 0.989233 0.981340 0.971646 0.749610 \n", | |
| "21 0.263122 0.955524 0.922541 0.930586 0.383459 \n", | |
| "22 0.138652 0.998331 0.998283 0.976383 0.997411 \n", | |
| "23 0.247536 0.944404 0.877370 0.928444 0.173572 \n", | |
| "24 0.336659 0.990104 0.978667 0.918085 0.875421 \n", | |
| "25 0.401469 0.963876 0.954015 0.893001 0.221249 \n", | |
| "26 0.332961 0.992544 0.973712 0.937979 0.845940 \n", | |
| "27 0.341660 0.971785 0.975554 0.895904 0.352145 \n", | |
| "28 0.578752 0.492292 0.122312 -0.288927 0.191127 \n", | |
| "29 0.460242 0.926089 0.853204 0.717342 0.405126 \n", | |
| ".. ... ... ... ... ... \n", | |
| "58 0.235361 0.991899 1.000000 0.927546 0.737489 \n", | |
| "59 0.189558 0.980900 0.999842 0.927359 0.506976 \n", | |
| "60 0.142030 0.990276 0.984540 0.980935 0.779370 \n", | |
| "61 0.124729 0.901260 0.907927 0.939584 0.163636 \n", | |
| "62 0.107740 0.988112 0.977762 0.975555 0.999033 \n", | |
| "63 0.179533 0.911248 0.942287 0.883271 -0.133398 \n", | |
| "64 0.367224 0.999991 1.000000 0.967209 0.999629 \n", | |
| "65 0.238332 0.971812 0.959122 0.973212 0.211095 \n", | |
| "66 0.401022 0.999889 1.000000 0.966992 0.996933 \n", | |
| "67 0.284760 0.980323 0.978061 0.977250 0.509969 \n", | |
| "68 0.258505 0.995616 0.995906 0.985579 0.833975 \n", | |
| "69 0.297123 0.943655 0.944391 0.974220 0.068419 \n", | |
| "70 0.067249 0.999453 1.000000 0.993935 0.998769 \n", | |
| "71 0.164108 0.978136 0.967044 0.966069 0.342775 \n", | |
| "72 0.452882 0.999998 1.000000 0.998365 0.999654 \n", | |
| "73 0.314697 0.972598 0.969714 0.998146 0.065281 \n", | |
| "74 0.624122 0.992905 0.983652 0.959778 0.900569 \n", | |
| "75 0.516824 0.979657 0.973410 0.962190 0.541348 \n", | |
| "76 0.017653 0.999450 0.998772 0.999990 0.610201 \n", | |
| "77 0.014198 0.997467 0.997146 0.999982 0.060978 \n", | |
| "78 0.003633 0.999976 0.999910 0.999997 0.997796 \n", | |
| "79 0.014089 0.997671 0.995892 0.999954 0.069229 \n", | |
| "80 0.096921 0.999998 1.000000 0.968106 0.999678 \n", | |
| "81 0.095981 0.989010 0.984276 0.932558 0.446691 \n", | |
| "82 0.120204 0.987079 0.973107 0.899272 0.822808 \n", | |
| "83 0.125489 0.987371 0.973107 0.904354 0.797415 \n", | |
| "84 0.108996 0.992210 0.994836 0.972288 0.876028 \n", | |
| "85 0.207201 0.780758 0.687252 0.502312 0.156832 \n", | |
| "86 0.128705 0.936889 0.886772 0.939217 0.998556 \n", | |
| "87 0.340442 0.612293 0.429809 0.244711 0.032248 \n", | |
| "\n", | |
| " dataset estimator type \n", | |
| "0 iris_binary LogisticRegression PI \n", | |
| "1 iris_binary LogisticRegression PICV \n", | |
| "2 iris_binary LinearSVC PI \n", | |
| "3 iris_binary LinearSVC PICV \n", | |
| "4 iris_binary RandomForestClassifier PI \n", | |
| "5 iris_binary RandomForestClassifier PICV \n", | |
| "6 iris_binary DecisionTreeClassifier PI \n", | |
| "7 iris_binary DecisionTreeClassifier PICV \n", | |
| "8 CLF(n_informative=5, n_redundant=0) LogisticRegression PI \n", | |
| "9 CLF(n_informative=5, n_redundant=0) LogisticRegression PICV \n", | |
| "10 CLF(n_informative=5, n_redundant=0) LinearSVC PI \n", | |
| "11 CLF(n_informative=5, n_redundant=0) LinearSVC PICV \n", | |
| "12 CLF(n_informative=5, n_redundant=0) RandomForestClassifier PI \n", | |
| "13 CLF(n_informative=5, n_redundant=0) RandomForestClassifier PICV \n", | |
| "14 CLF(n_informative=5, n_redundant=0) DecisionTreeClassifier PI \n", | |
| "15 CLF(n_informative=5, n_redundant=0) DecisionTreeClassifier PICV \n", | |
| "16 CLF(n_informative=5, n_redundant=4) LogisticRegression PI \n", | |
| "17 CLF(n_informative=5, n_redundant=4) LogisticRegression PICV \n", | |
| "18 CLF(n_informative=5, n_redundant=4) LinearSVC PI \n", | |
| "19 CLF(n_informative=5, n_redundant=4) LinearSVC PICV \n", | |
| "20 CLF(n_informative=5, n_redundant=4) RandomForestClassifier PI \n", | |
| "21 CLF(n_informative=5, n_redundant=4) RandomForestClassifier PICV \n", | |
| "22 CLF(n_informative=5, n_redundant=4) DecisionTreeClassifier PI \n", | |
| "23 CLF(n_informative=5, n_redundant=4) DecisionTreeClassifier PICV \n", | |
| "24 CLF(n_informative=1, n_redundant=4) LogisticRegression PI \n", | |
| "25 CLF(n_informative=1, n_redundant=4) LogisticRegression PICV \n", | |
| "26 CLF(n_informative=1, n_redundant=4) LinearSVC PI \n", | |
| "27 CLF(n_informative=1, n_redundant=4) LinearSVC PICV \n", | |
| "28 CLF(n_informative=1, n_redundant=4) RandomForestClassifier PI \n", | |
| "29 CLF(n_informative=1, n_redundant=4) RandomForestClassifier PICV \n", | |
| ".. ... ... ... \n", | |
| "58 REG(n_informative=5) LinearSVR PI \n", | |
| "59 REG(n_informative=5) LinearSVR PICV \n", | |
| "60 REG(n_informative=5) RandomForestRegressor PI \n", | |
| "61 REG(n_informative=5) RandomForestRegressor PICV \n", | |
| "62 REG(n_informative=5) DecisionTreeRegressor PI \n", | |
| "63 REG(n_informative=5) DecisionTreeRegressor PICV \n", | |
| "64 REG(n_informative=5, effective_rank=2) LinearRegression PI \n", | |
| "65 REG(n_informative=5, effective_rank=2) LinearRegression PICV \n", | |
| "66 REG(n_informative=5, effective_rank=2) LinearSVR PI \n", | |
| "67 REG(n_informative=5, effective_rank=2) LinearSVR PICV \n", | |
| "68 REG(n_informative=5, effective_rank=2) RandomForestRegressor PI \n", | |
| "69 REG(n_informative=5, effective_rank=2) RandomForestRegressor PICV \n", | |
| "70 REG(n_informative=5, effective_rank=2) DecisionTreeRegressor PI \n", | |
| "71 REG(n_informative=5, effective_rank=2) DecisionTreeRegressor PICV \n", | |
| "72 REG(n_informative=1) LinearRegression PI \n", | |
| "73 REG(n_informative=1) LinearRegression PICV \n", | |
| "74 REG(n_informative=1) LinearSVR PI \n", | |
| "75 REG(n_informative=1) LinearSVR PICV \n", | |
| "76 REG(n_informative=1) RandomForestRegressor PI \n", | |
| "77 REG(n_informative=1) RandomForestRegressor PICV \n", | |
| "78 REG(n_informative=1) DecisionTreeRegressor PI \n", | |
| "79 REG(n_informative=1) DecisionTreeRegressor PICV \n", | |
| "80 REG(n_informative=20) LinearRegression PI \n", | |
| "81 REG(n_informative=20) LinearRegression PICV \n", | |
| "82 REG(n_informative=20) LinearSVR PI \n", | |
| "83 REG(n_informative=20) LinearSVR PICV \n", | |
| "84 REG(n_informative=20) RandomForestRegressor PI \n", | |
| "85 REG(n_informative=20) RandomForestRegressor PICV \n", | |
| "86 REG(n_informative=20) DecisionTreeRegressor PI \n", | |
| "87 REG(n_informative=20) DecisionTreeRegressor PICV \n", | |
| "\n", | |
| "[88 rows x 8 columns]" | |
| ] | |
| }, | |
| "execution_count": 67, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df = pd.DataFrame([s[3] for s in scores])\n", | |
| "df = df.assign(\n", | |
| " estimator=[s[0] for s in scores],\n", | |
| " dataset=[s[1] for s in scores],\n", | |
| " type=[s[2] for s in scores],\n", | |
| ")\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 68, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>L2</th>\n", | |
| " <th>NDCG</th>\n", | |
| " <th>NDCG@5</th>\n", | |
| " <th>Pearson</th>\n", | |
| " <th>SpearmanR</th>\n", | |
| " <th>dataset</th>\n", | |
| " <th>estimator</th>\n", | |
| " <th>type</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0.672614</td>\n", | |
| " <td>0.984817</td>\n", | |
| " <td>0.984817</td>\n", | |
| " <td>0.833270</td>\n", | |
| " <td>0.632456</td>\n", | |
| " <td>iris_binary</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.675399</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.872404</td>\n", | |
| " <td>0.948683</td>\n", | |
| " <td>iris_binary</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0.282843</td>\n", | |
| " <td>0.964335</td>\n", | |
| " <td>0.964335</td>\n", | |
| " <td>0.968496</td>\n", | |
| " <td>0.816497</td>\n", | |
| " <td>iris_binary</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>0.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>iris_binary</td>\n", | |
| " <td>DecisionTreeClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>0.167495</td>\n", | |
| " <td>0.971622</td>\n", | |
| " <td>0.965187</td>\n", | |
| " <td>0.872985</td>\n", | |
| " <td>0.908544</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=0)</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>0.123809</td>\n", | |
| " <td>0.969013</td>\n", | |
| " <td>0.932469</td>\n", | |
| " <td>0.893776</td>\n", | |
| " <td>0.934538</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=0)</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>0.322749</td>\n", | |
| " <td>0.994469</td>\n", | |
| " <td>0.989704</td>\n", | |
| " <td>0.900801</td>\n", | |
| " <td>0.885049</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=0)</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td>0.072285</td>\n", | |
| " <td>0.993702</td>\n", | |
| " <td>0.970213</td>\n", | |
| " <td>0.981011</td>\n", | |
| " <td>0.973103</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=0)</td>\n", | |
| " <td>DecisionTreeClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>16</th>\n", | |
| " <td>0.273521</td>\n", | |
| " <td>0.981474</td>\n", | |
| " <td>0.973298</td>\n", | |
| " <td>0.812503</td>\n", | |
| " <td>0.817674</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=4)</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>18</th>\n", | |
| " <td>0.250067</td>\n", | |
| " <td>0.966972</td>\n", | |
| " <td>0.913251</td>\n", | |
| " <td>0.768077</td>\n", | |
| " <td>0.706813</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=4)</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>20</th>\n", | |
| " <td>0.275022</td>\n", | |
| " <td>0.989233</td>\n", | |
| " <td>0.981340</td>\n", | |
| " <td>0.971646</td>\n", | |
| " <td>0.749610</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=4)</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>22</th>\n", | |
| " <td>0.138652</td>\n", | |
| " <td>0.998331</td>\n", | |
| " <td>0.998283</td>\n", | |
| " <td>0.976383</td>\n", | |
| " <td>0.997411</td>\n", | |
| " <td>CLF(n_informative=5, n_redundant=4)</td>\n", | |
| " <td>DecisionTreeClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>24</th>\n", | |
| " <td>0.336659</td>\n", | |
| " <td>0.990104</td>\n", | |
| " <td>0.978667</td>\n", | |
| " <td>0.918085</td>\n", | |
| " <td>0.875421</td>\n", | |
| " <td>CLF(n_informative=1, n_redundant=4)</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>26</th>\n", | |
| " <td>0.332961</td>\n", | |
| " <td>0.992544</td>\n", | |
| " <td>0.973712</td>\n", | |
| " <td>0.937979</td>\n", | |
| " <td>0.845940</td>\n", | |
| " <td>CLF(n_informative=1, n_redundant=4)</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>28</th>\n", | |
| " <td>0.578752</td>\n", | |
| " <td>0.492292</td>\n", | |
| " <td>0.122312</td>\n", | |
| " <td>-0.288927</td>\n", | |
| " <td>0.191127</td>\n", | |
| " <td>CLF(n_informative=1, n_redundant=4)</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>30</th>\n", | |
| " <td>0.015446</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.999899</td>\n", | |
| " <td>0.999027</td>\n", | |
| " <td>CLF(n_informative=1, n_redundant=4)</td>\n", | |
| " <td>DecisionTreeClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>32</th>\n", | |
| " <td>0.173657</td>\n", | |
| " <td>0.979462</td>\n", | |
| " <td>0.964900</td>\n", | |
| " <td>0.877614</td>\n", | |
| " <td>0.700077</td>\n", | |
| " <td>CLF(n_informative=20, n_redundant=0)</td>\n", | |
| " <td>LogisticRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>34</th>\n", | |
| " <td>0.252788</td>\n", | |
| " <td>0.973859</td>\n", | |
| " <td>0.968925</td>\n", | |
| " <td>0.768863</td>\n", | |
| " <td>0.580477</td>\n", | |
| " <td>CLF(n_informative=20, n_redundant=0)</td>\n", | |
| " <td>LinearSVC</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>36</th>\n", | |
| " <td>0.140510</td>\n", | |
| " <td>0.938507</td>\n", | |
| " <td>0.768774</td>\n", | |
| " <td>0.672920</td>\n", | |
| " <td>0.654109</td>\n", | |
| " <td>CLF(n_informative=20, n_redundant=0)</td>\n", | |
| " <td>RandomForestClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>38</th>\n", | |
| " <td>0.109475</td>\n", | |
| " <td>0.995163</td>\n", | |
| " <td>0.979534</td>\n", | |
| " <td>0.959566</td>\n", | |
| " <td>0.981116</td>\n", | |
| " <td>CLF(n_informative=20, n_redundant=0)</td>\n", | |
| " <td>DecisionTreeClassifier</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>40</th>\n", | |
| " <td>0.242637</td>\n", | |
| " <td>0.999890</td>\n", | |
| " <td>0.999311</td>\n", | |
| " <td>0.952595</td>\n", | |
| " <td>0.989011</td>\n", | |
| " <td>boston</td>\n", | |
| " <td>LinearRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>42</th>\n", | |
| " <td>0.318675</td>\n", | |
| " <td>0.997746</td>\n", | |
| " <td>0.994291</td>\n", | |
| " <td>0.950851</td>\n", | |
| " <td>0.972527</td>\n", | |
| " <td>boston</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>44</th>\n", | |
| " <td>0.034486</td>\n", | |
| " <td>0.999036</td>\n", | |
| " <td>0.999015</td>\n", | |
| " <td>0.997868</td>\n", | |
| " <td>0.989011</td>\n", | |
| " <td>boston</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>46</th>\n", | |
| " <td>0.177042</td>\n", | |
| " <td>0.999770</td>\n", | |
| " <td>0.999955</td>\n", | |
| " <td>0.973999</td>\n", | |
| " <td>0.967033</td>\n", | |
| " <td>boston</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>48</th>\n", | |
| " <td>0.292793</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.967100</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>diabetese</td>\n", | |
| " <td>LinearRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>50</th>\n", | |
| " <td>0.352574</td>\n", | |
| " <td>0.985625</td>\n", | |
| " <td>0.984779</td>\n", | |
| " <td>0.954882</td>\n", | |
| " <td>0.951515</td>\n", | |
| " <td>diabetese</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>52</th>\n", | |
| " <td>0.057059</td>\n", | |
| " <td>0.999937</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.991997</td>\n", | |
| " <td>0.951515</td>\n", | |
| " <td>diabetese</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>54</th>\n", | |
| " <td>0.120583</td>\n", | |
| " <td>0.927134</td>\n", | |
| " <td>0.916092</td>\n", | |
| " <td>0.929929</td>\n", | |
| " <td>0.915152</td>\n", | |
| " <td>diabetese</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>56</th>\n", | |
| " <td>0.159032</td>\n", | |
| " <td>0.999994</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.983486</td>\n", | |
| " <td>0.999579</td>\n", | |
| " <td>REG(n_informative=5)</td>\n", | |
| " <td>LinearRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>58</th>\n", | |
| " <td>0.235361</td>\n", | |
| " <td>0.991899</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.927546</td>\n", | |
| " <td>0.737489</td>\n", | |
| " <td>REG(n_informative=5)</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>60</th>\n", | |
| " <td>0.142030</td>\n", | |
| " <td>0.990276</td>\n", | |
| " <td>0.984540</td>\n", | |
| " <td>0.980935</td>\n", | |
| " <td>0.779370</td>\n", | |
| " <td>REG(n_informative=5)</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>62</th>\n", | |
| " <td>0.107740</td>\n", | |
| " <td>0.988112</td>\n", | |
| " <td>0.977762</td>\n", | |
| " <td>0.975555</td>\n", | |
| " <td>0.999033</td>\n", | |
| " <td>REG(n_informative=5)</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>64</th>\n", | |
| " <td>0.367224</td>\n", | |
| " <td>0.999991</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.967209</td>\n", | |
| " <td>0.999629</td>\n", | |
| " <td>REG(n_informative=5, effective_rank=2)</td>\n", | |
| " <td>LinearRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>66</th>\n", | |
| " <td>0.401022</td>\n", | |
| " <td>0.999889</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.966992</td>\n", | |
| " <td>0.996933</td>\n", | |
| " <td>REG(n_informative=5, effective_rank=2)</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>68</th>\n", | |
| " <td>0.258505</td>\n", | |
| " <td>0.995616</td>\n", | |
| " <td>0.995906</td>\n", | |
| " <td>0.985579</td>\n", | |
| " <td>0.833975</td>\n", | |
| " <td>REG(n_informative=5, effective_rank=2)</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>70</th>\n", | |
| " <td>0.067249</td>\n", | |
| " <td>0.999453</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.993935</td>\n", | |
| " <td>0.998769</td>\n", | |
| " <td>REG(n_informative=5, effective_rank=2)</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>72</th>\n", | |
| " <td>0.452882</td>\n", | |
| " <td>0.999998</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.998365</td>\n", | |
| " <td>0.999654</td>\n", | |
| " <td>REG(n_informative=1)</td>\n", | |
| " <td>LinearRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>74</th>\n", | |
| " <td>0.624122</td>\n", | |
| " <td>0.992905</td>\n", | |
| " <td>0.983652</td>\n", | |
| " <td>0.959778</td>\n", | |
| " <td>0.900569</td>\n", | |
| " <td>REG(n_informative=1)</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>76</th>\n", | |
| " <td>0.017653</td>\n", | |
| " <td>0.999450</td>\n", | |
| " <td>0.998772</td>\n", | |
| " <td>0.999990</td>\n", | |
| " <td>0.610201</td>\n", | |
| " <td>REG(n_informative=1)</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>78</th>\n", | |
| " <td>0.003633</td>\n", | |
| " <td>0.999976</td>\n", | |
| " <td>0.999910</td>\n", | |
| " <td>0.999997</td>\n", | |
| " <td>0.997796</td>\n", | |
| " <td>REG(n_informative=1)</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>80</th>\n", | |
| " <td>0.096921</td>\n", | |
| " <td>0.999998</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.968106</td>\n", | |
| " <td>0.999678</td>\n", | |
| " <td>REG(n_informative=20)</td>\n", | |
| " <td>LinearRegression</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>82</th>\n", | |
| " <td>0.120204</td>\n", | |
| " <td>0.987079</td>\n", | |
| " <td>0.973107</td>\n", | |
| " <td>0.899272</td>\n", | |
| " <td>0.822808</td>\n", | |
| " <td>REG(n_informative=20)</td>\n", | |
| " <td>LinearSVR</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>84</th>\n", | |
| " <td>0.108996</td>\n", | |
| " <td>0.992210</td>\n", | |
| " <td>0.994836</td>\n", | |
| " <td>0.972288</td>\n", | |
| " <td>0.876028</td>\n", | |
| " <td>REG(n_informative=20)</td>\n", | |
| " <td>RandomForestRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>86</th>\n", | |
| " <td>0.128705</td>\n", | |
| " <td>0.936889</td>\n", | |
| " <td>0.886772</td>\n", | |
| " <td>0.939217</td>\n", | |
| " <td>0.998556</td>\n", | |
| " <td>REG(n_informative=20)</td>\n", | |
| " <td>DecisionTreeRegressor</td>\n", | |
| " <td>PI</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " L2 NDCG NDCG@5 Pearson SpearmanR \\\n", | |
| "0 0.672614 0.984817 0.984817 0.833270 0.632456 \n", | |
| "2 0.675399 1.000000 1.000000 0.872404 0.948683 \n", | |
| "4 0.282843 0.964335 0.964335 0.968496 0.816497 \n", | |
| "6 0.000000 1.000000 1.000000 1.000000 1.000000 \n", | |
| "8 0.167495 0.971622 0.965187 0.872985 0.908544 \n", | |
| "10 0.123809 0.969013 0.932469 0.893776 0.934538 \n", | |
| "12 0.322749 0.994469 0.989704 0.900801 0.885049 \n", | |
| "14 0.072285 0.993702 0.970213 0.981011 0.973103 \n", | |
| "16 0.273521 0.981474 0.973298 0.812503 0.817674 \n", | |
| "18 0.250067 0.966972 0.913251 0.768077 0.706813 \n", | |
| "20 0.275022 0.989233 0.981340 0.971646 0.749610 \n", | |
| "22 0.138652 0.998331 0.998283 0.976383 0.997411 \n", | |
| "24 0.336659 0.990104 0.978667 0.918085 0.875421 \n", | |
| "26 0.332961 0.992544 0.973712 0.937979 0.845940 \n", | |
| "28 0.578752 0.492292 0.122312 -0.288927 0.191127 \n", | |
| "30 0.015446 1.000000 1.000000 0.999899 0.999027 \n", | |
| "32 0.173657 0.979462 0.964900 0.877614 0.700077 \n", | |
| "34 0.252788 0.973859 0.968925 0.768863 0.580477 \n", | |
| "36 0.140510 0.938507 0.768774 0.672920 0.654109 \n", | |
| "38 0.109475 0.995163 0.979534 0.959566 0.981116 \n", | |
| "40 0.242637 0.999890 0.999311 0.952595 0.989011 \n", | |
| "42 0.318675 0.997746 0.994291 0.950851 0.972527 \n", | |
| "44 0.034486 0.999036 0.999015 0.997868 0.989011 \n", | |
| "46 0.177042 0.999770 0.999955 0.973999 0.967033 \n", | |
| "48 0.292793 1.000000 1.000000 0.967100 1.000000 \n", | |
| "50 0.352574 0.985625 0.984779 0.954882 0.951515 \n", | |
| "52 0.057059 0.999937 1.000000 0.991997 0.951515 \n", | |
| "54 0.120583 0.927134 0.916092 0.929929 0.915152 \n", | |
| "56 0.159032 0.999994 1.000000 0.983486 0.999579 \n", | |
| "58 0.235361 0.991899 1.000000 0.927546 0.737489 \n", | |
| "60 0.142030 0.990276 0.984540 0.980935 0.779370 \n", | |
| "62 0.107740 0.988112 0.977762 0.975555 0.999033 \n", | |
| "64 0.367224 0.999991 1.000000 0.967209 0.999629 \n", | |
| "66 0.401022 0.999889 1.000000 0.966992 0.996933 \n", | |
| "68 0.258505 0.995616 0.995906 0.985579 0.833975 \n", | |
| "70 0.067249 0.999453 1.000000 0.993935 0.998769 \n", | |
| "72 0.452882 0.999998 1.000000 0.998365 0.999654 \n", | |
| "74 0.624122 0.992905 0.983652 0.959778 0.900569 \n", | |
| "76 0.017653 0.999450 0.998772 0.999990 0.610201 \n", | |
| "78 0.003633 0.999976 0.999910 0.999997 0.997796 \n", | |
| "80 0.096921 0.999998 1.000000 0.968106 0.999678 \n", | |
| "82 0.120204 0.987079 0.973107 0.899272 0.822808 \n", | |
| "84 0.108996 0.992210 0.994836 0.972288 0.876028 \n", | |
| "86 0.128705 0.936889 0.886772 0.939217 0.998556 \n", | |
| "\n", | |
| " dataset estimator type \n", | |
| "0 iris_binary LogisticRegression PI \n", | |
| "2 iris_binary LinearSVC PI \n", | |
| "4 iris_binary RandomForestClassifier PI \n", | |
| "6 iris_binary DecisionTreeClassifier PI \n", | |
| "8 CLF(n_informative=5, n_redundant=0) LogisticRegression PI \n", | |
| "10 CLF(n_informative=5, n_redundant=0) LinearSVC PI \n", | |
| "12 CLF(n_informative=5, n_redundant=0) RandomForestClassifier PI \n", | |
| "14 CLF(n_informative=5, n_redundant=0) DecisionTreeClassifier PI \n", | |
| "16 CLF(n_informative=5, n_redundant=4) LogisticRegression PI \n", | |
| "18 CLF(n_informative=5, n_redundant=4) LinearSVC PI \n", | |
| "20 CLF(n_informative=5, n_redundant=4) RandomForestClassifier PI \n", | |
| "22 CLF(n_informative=5, n_redundant=4) DecisionTreeClassifier PI \n", | |
| "24 CLF(n_informative=1, n_redundant=4) LogisticRegression PI \n", | |
| "26 CLF(n_informative=1, n_redundant=4) LinearSVC PI \n", | |
| "28 CLF(n_informative=1, n_redundant=4) RandomForestClassifier PI \n", | |
| "30 CLF(n_informative=1, n_redundant=4) DecisionTreeClassifier PI \n", | |
| "32 CLF(n_informative=20, n_redundant=0) LogisticRegression PI \n", | |
| "34 CLF(n_informative=20, n_redundant=0) LinearSVC PI \n", | |
| "36 CLF(n_informative=20, n_redundant=0) RandomForestClassifier PI \n", | |
| "38 CLF(n_informative=20, n_redundant=0) DecisionTreeClassifier PI \n", | |
| "40 boston LinearRegression PI \n", | |
| "42 boston LinearSVR PI \n", | |
| "44 boston RandomForestRegressor PI \n", | |
| "46 boston DecisionTreeRegressor PI \n", | |
| "48 diabetese LinearRegression PI \n", | |
| "50 diabetese LinearSVR PI \n", | |
| "52 diabetese RandomForestRegressor PI \n", | |
| "54 diabetese DecisionTreeRegressor PI \n", | |
| "56 REG(n_informative=5) LinearRegression PI \n", | |
| "58 REG(n_informative=5) LinearSVR PI \n", | |
| "60 REG(n_informative=5) RandomForestRegressor PI \n", | |
| "62 REG(n_informative=5) DecisionTreeRegressor PI \n", | |
| "64 REG(n_informative=5, effective_rank=2) LinearRegression PI \n", | |
| "66 REG(n_informative=5, effective_rank=2) LinearSVR PI \n", | |
| "68 REG(n_informative=5, effective_rank=2) RandomForestRegressor PI \n", | |
| "70 REG(n_informative=5, effective_rank=2) DecisionTreeRegressor PI \n", | |
| "72 REG(n_informative=1) LinearRegression PI \n", | |
| "74 REG(n_informative=1) LinearSVR PI \n", | |
| "76 REG(n_informative=1) RandomForestRegressor PI \n", | |
| "78 REG(n_informative=1) DecisionTreeRegressor PI \n", | |
| "80 REG(n_informative=20) LinearRegression PI \n", | |
| "82 REG(n_informative=20) LinearSVR PI \n", | |
| "84 REG(n_informative=20) RandomForestRegressor PI \n", | |
| "86 REG(n_informative=20) DecisionTreeRegressor PI " | |
| ] | |
| }, | |
| "execution_count": 68, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_pi = df[df.type==\"PI\"]\n", | |
| "df_pi" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 71, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>L2</th>\n", | |
| " <th>NDCG</th>\n", | |
| " <th>NDCG@5</th>\n", | |
| " <th>Pearson</th>\n", | |
| " <th>SpearmanR</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>estimator</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>DecisionTreeClassifier</th>\n", | |
| " <td>0.067172</td>\n", | |
| " <td>0.997439</td>\n", | |
| " <td>0.989606</td>\n", | |
| " <td>0.983372</td>\n", | |
| " <td>0.990131</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>DecisionTreeRegressor</th>\n", | |
| " <td>0.100825</td>\n", | |
| " <td>0.975222</td>\n", | |
| " <td>0.963415</td>\n", | |
| " <td>0.968772</td>\n", | |
| " <td>0.979390</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>LinearRegression</th>\n", | |
| " <td>0.268582</td>\n", | |
| " <td>0.999978</td>\n", | |
| " <td>0.999885</td>\n", | |
| " <td>0.972810</td>\n", | |
| " <td>0.997925</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>LinearSVC</th>\n", | |
| " <td>0.327005</td>\n", | |
| " <td>0.980478</td>\n", | |
| " <td>0.957671</td>\n", | |
| " <td>0.848220</td>\n", | |
| " <td>0.803290</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>LinearSVR</th>\n", | |
| " <td>0.341993</td>\n", | |
| " <td>0.992524</td>\n", | |
| " <td>0.989305</td>\n", | |
| " <td>0.943220</td>\n", | |
| " <td>0.896974</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>LogisticRegression</th>\n", | |
| " <td>0.324789</td>\n", | |
| " <td>0.981496</td>\n", | |
| " <td>0.973374</td>\n", | |
| " <td>0.862891</td>\n", | |
| " <td>0.786834</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>RandomForestClassifier</th>\n", | |
| " <td>0.319975</td>\n", | |
| " <td>0.875767</td>\n", | |
| " <td>0.765293</td>\n", | |
| " <td>0.644987</td>\n", | |
| " <td>0.659278</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>RandomForestRegressor</th>\n", | |
| " <td>0.103122</td>\n", | |
| " <td>0.996088</td>\n", | |
| " <td>0.995512</td>\n", | |
| " <td>0.988109</td>\n", | |
| " <td>0.840017</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " L2 NDCG NDCG@5 Pearson SpearmanR\n", | |
| "estimator \n", | |
| "DecisionTreeClassifier 0.067172 0.997439 0.989606 0.983372 0.990131\n", | |
| "DecisionTreeRegressor 0.100825 0.975222 0.963415 0.968772 0.979390\n", | |
| "LinearRegression 0.268582 0.999978 0.999885 0.972810 0.997925\n", | |
| "LinearSVC 0.327005 0.980478 0.957671 0.848220 0.803290\n", | |
| "LinearSVR 0.341993 0.992524 0.989305 0.943220 0.896974\n", | |
| "LogisticRegression 0.324789 0.981496 0.973374 0.862891 0.786834\n", | |
| "RandomForestClassifier 0.319975 0.875767 0.765293 0.644987 0.659278\n", | |
| "RandomForestRegressor 0.103122 0.996088 0.995512 0.988109 0.840017" | |
| ] | |
| }, | |
| "execution_count": 71, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_pi.groupby('estimator').mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 72, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style>\n", | |
| " .dataframe thead tr:only-child th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: left;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>L2</th>\n", | |
| " <th>NDCG</th>\n", | |
| " <th>NDCG@5</th>\n", | |
| " <th>Pearson</th>\n", | |
| " <th>SpearmanR</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>dataset</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>CLF(n_informative=1, n_redundant=4)</th>\n", | |
| " <td>0.315954</td>\n", | |
| " <td>0.868735</td>\n", | |
| " <td>0.768673</td>\n", | |
| " <td>0.641759</td>\n", | |
| " <td>0.727879</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>CLF(n_informative=20, n_redundant=0)</th>\n", | |
| " <td>0.169107</td>\n", | |
| " <td>0.971748</td>\n", | |
| " <td>0.920533</td>\n", | |
| " <td>0.819741</td>\n", | |
| " <td>0.728945</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>CLF(n_informative=5, n_redundant=0)</th>\n", | |
| " <td>0.171585</td>\n", | |
| " <td>0.982201</td>\n", | |
| " <td>0.964393</td>\n", | |
| " <td>0.912143</td>\n", | |
| " <td>0.925309</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>CLF(n_informative=5, n_redundant=4)</th>\n", | |
| " <td>0.234315</td>\n", | |
| " <td>0.984003</td>\n", | |
| " <td>0.966543</td>\n", | |
| " <td>0.882152</td>\n", | |
| " <td>0.817877</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>REG(n_informative=1)</th>\n", | |
| " <td>0.274572</td>\n", | |
| " <td>0.998082</td>\n", | |
| " <td>0.995583</td>\n", | |
| " <td>0.989532</td>\n", | |
| " <td>0.877055</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>REG(n_informative=20)</th>\n", | |
| " <td>0.113707</td>\n", | |
| " <td>0.979044</td>\n", | |
| " <td>0.963679</td>\n", | |
| " <td>0.944721</td>\n", | |
| " <td>0.924267</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>REG(n_informative=5)</th>\n", | |
| " <td>0.161041</td>\n", | |
| " <td>0.992570</td>\n", | |
| " <td>0.990575</td>\n", | |
| " <td>0.966880</td>\n", | |
| " <td>0.878868</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>REG(n_informative=5, effective_rank=2)</th>\n", | |
| " <td>0.273500</td>\n", | |
| " <td>0.998737</td>\n", | |
| " <td>0.998977</td>\n", | |
| " <td>0.978429</td>\n", | |
| " <td>0.957326</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>boston</th>\n", | |
| " <td>0.193210</td>\n", | |
| " <td>0.999111</td>\n", | |
| " <td>0.998143</td>\n", | |
| " <td>0.968828</td>\n", | |
| " <td>0.979396</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>diabetese</th>\n", | |
| " <td>0.205752</td>\n", | |
| " <td>0.978174</td>\n", | |
| " <td>0.975218</td>\n", | |
| " <td>0.960977</td>\n", | |
| " <td>0.954545</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>iris_binary</th>\n", | |
| " <td>0.407714</td>\n", | |
| " <td>0.987288</td>\n", | |
| " <td>0.987288</td>\n", | |
| " <td>0.918542</td>\n", | |
| " <td>0.849409</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " L2 NDCG NDCG@5 \\\n", | |
| "dataset \n", | |
| "CLF(n_informative=1, n_redundant=4) 0.315954 0.868735 0.768673 \n", | |
| "CLF(n_informative=20, n_redundant=0) 0.169107 0.971748 0.920533 \n", | |
| "CLF(n_informative=5, n_redundant=0) 0.171585 0.982201 0.964393 \n", | |
| "CLF(n_informative=5, n_redundant=4) 0.234315 0.984003 0.966543 \n", | |
| "REG(n_informative=1) 0.274572 0.998082 0.995583 \n", | |
| "REG(n_informative=20) 0.113707 0.979044 0.963679 \n", | |
| "REG(n_informative=5) 0.161041 0.992570 0.990575 \n", | |
| "REG(n_informative=5, effective_rank=2) 0.273500 0.998737 0.998977 \n", | |
| "boston 0.193210 0.999111 0.998143 \n", | |
| "diabetese 0.205752 0.978174 0.975218 \n", | |
| "iris_binary 0.407714 0.987288 0.987288 \n", | |
| "\n", | |
| " Pearson SpearmanR \n", | |
| "dataset \n", | |
| "CLF(n_informative=1, n_redundant=4) 0.641759 0.727879 \n", | |
| "CLF(n_informative=20, n_redundant=0) 0.819741 0.728945 \n", | |
| "CLF(n_informative=5, n_redundant=0) 0.912143 0.925309 \n", | |
| "CLF(n_informative=5, n_redundant=4) 0.882152 0.817877 \n", | |
| "REG(n_informative=1) 0.989532 0.877055 \n", | |
| "REG(n_informative=20) 0.944721 0.924267 \n", | |
| "REG(n_informative=5) 0.966880 0.878868 \n", | |
| "REG(n_informative=5, effective_rank=2) 0.978429 0.957326 \n", | |
| "boston 0.968828 0.979396 \n", | |
| "diabetese 0.960977 0.954545 \n", | |
| "iris_binary 0.918542 0.849409 " | |
| ] | |
| }, | |
| "execution_count": 72, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df_pi.groupby('dataset').mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment