Last active
September 4, 2020 17:52
-
-
Save drcjar/836cdd0d4cfac42fc427d1700648dc78 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import seaborn as sns\n", | |
| "import statsmodels.formula.api as smf\n", | |
| "%matplotlib inline\n", | |
| "from scipy.stats import scoreatpercentile\n", | |
| "import math\n", | |
| "import numpy as np\n", | |
| "\n", | |
| "pd.options.display.float_format = '{:,.2f}'.format\n", | |
| "\n", | |
| "import scipy.stats as stats\n", | |
| "stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df)\n", | |
| "\n", | |
| "# jobs_dataframe.csv\n", | |
| "# job_tasks_dataframe.csv\n", | |
| "# flat_dataframe.csv" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = pd.read_csv('flat_dataframe.csv')\n", | |
| "reorderjobcat = {1.:5 , 2.2:4, 3.:3 , 2.1:4, 2.3:4, 5.:1 , 4.:2}\n", | |
| "df['lowest_peto_cat_reordered'] = df.lowest_peto_cat.map(reorderjobcat) # make it so highest n is highest exposed\n", | |
| "df['median_ssec_int'] = df.median_ssec.astype(int)\n", | |
| "\n", | |
| "df.to_csv('flat_data_sans_genotype.csv', index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# table one" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>count</th>\n", | |
| " <th>mean</th>\n", | |
| " <th>std</th>\n", | |
| " <th>min</th>\n", | |
| " <th>25%</th>\n", | |
| " <th>50%</th>\n", | |
| " <th>75%</th>\n", | |
| " <th>max</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>case</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>466.00</td>\n", | |
| " <td>74.42</td>\n", | |
| " <td>8.53</td>\n", | |
| " <td>34.00</td>\n", | |
| " <td>69.00</td>\n", | |
| " <td>75.00</td>\n", | |
| " <td>80.00</td>\n", | |
| " <td>95.00</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>494.00</td>\n", | |
| " <td>76.59</td>\n", | |
| " <td>7.84</td>\n", | |
| " <td>53.00</td>\n", | |
| " <td>72.00</td>\n", | |
| " <td>77.00</td>\n", | |
| " <td>82.00</td>\n", | |
| " <td>95.00</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " count mean std min 25% 50% 75% max\n", | |
| "case \n", | |
| "0 466.00 74.42 8.53 34.00 69.00 75.00 80.00 95.00\n", | |
| "1 494.00 76.59 7.84 53.00 72.00 77.00 82.00 95.00" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.groupby('case').age.describe()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "case ethnicity \n", | |
| "0 White 449\n", | |
| " Asian / Asian British 8\n", | |
| " Black / African/ Caribbean/ Black British 7\n", | |
| " Other ethnic group 2\n", | |
| "1 White 479\n", | |
| " Asian / Asian British 11\n", | |
| " Black / African/ Caribbean/ Black British 2\n", | |
| " Arab 1\n", | |
| " Mixed / Multiple ethnic groups 1\n", | |
| "Name: ethnicity, dtype: int64\n", | |
| "case ethnicity \n", | |
| "0 White 0.96\n", | |
| " Asian / Asian British 0.02\n", | |
| " Black / African/ Caribbean/ Black British 0.02\n", | |
| " Other ethnic group 0.00\n", | |
| "1 White 0.97\n", | |
| " Asian / Asian British 0.02\n", | |
| " Black / African/ Caribbean/ Black British 0.00\n", | |
| " Arab 0.00\n", | |
| " Mixed / Multiple ethnic groups 0.00\n", | |
| "Name: ethnicity, dtype: float64\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(df.groupby('case').ethnicity.value_counts() )\n", | |
| "print(df.groupby('case').ethnicity.value_counts(normalize=True) )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "case median_ssec_int\n", | |
| "0 1 39\n", | |
| " 2 61\n", | |
| " 3 73\n", | |
| " 4 52\n", | |
| " 5 99\n", | |
| " 6 88\n", | |
| " 7 54\n", | |
| "1 1 37\n", | |
| " 2 59\n", | |
| " 3 71\n", | |
| " 4 55\n", | |
| " 5 93\n", | |
| " 6 113\n", | |
| " 7 66\n", | |
| "Name: median_ssec_int, dtype: int64\n", | |
| "case median_ssec_int\n", | |
| "0 1 0.08\n", | |
| " 2 0.13\n", | |
| " 3 0.16\n", | |
| " 4 0.11\n", | |
| " 5 0.21\n", | |
| " 6 0.19\n", | |
| " 7 0.12\n", | |
| "1 1 0.07\n", | |
| " 2 0.12\n", | |
| " 3 0.14\n", | |
| " 4 0.11\n", | |
| " 5 0.19\n", | |
| " 6 0.23\n", | |
| " 7 0.13\n", | |
| "Name: median_ssec_int, dtype: float64\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print (df.groupby('case')['median_ssec_int'].value_counts(sort=False))\n", | |
| "print (df.groupby('case')['median_ssec_int'].value_counts(sort=False,normalize=True))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "case current_smoker\n", | |
| "0 0 436\n", | |
| " 1 30\n", | |
| "1 0 484\n", | |
| " 1 10\n", | |
| "Name: current_smoker, dtype: int64\n", | |
| "case current_smoker\n", | |
| "0 0 0.94\n", | |
| " 1 0.06\n", | |
| "1 0 0.98\n", | |
| " 1 0.02\n", | |
| "Name: current_smoker, dtype: float64\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print (df.groupby('case')['current_smoker'].value_counts(sort=False))\n", | |
| "print (df.groupby('case')['current_smoker'].value_counts(sort=False,normalize=True))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "case ever_smoked\n", | |
| "0 0 139\n", | |
| " 1 327\n", | |
| "1 0 121\n", | |
| " 1 373\n", | |
| "Name: ever_smoked, dtype: int64\n", | |
| "case ever_smoked\n", | |
| "0 0 0.30\n", | |
| " 1 0.70\n", | |
| "1 0 0.24\n", | |
| " 1 0.76\n", | |
| "Name: ever_smoked, dtype: float64\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print (df.groupby('case')['ever_smoked'].value_counts(sort=False))\n", | |
| "print (df.groupby('case')['ever_smoked'].value_counts(sort=False, normalize=True))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>count</th>\n", | |
| " <th>mean</th>\n", | |
| " <th>std</th>\n", | |
| " <th>min</th>\n", | |
| " <th>25%</th>\n", | |
| " <th>50%</th>\n", | |
| " <th>75%</th>\n", | |
| " <th>max</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>case</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>301.00</td>\n", | |
| " <td>26.16</td>\n", | |
| " <td>24.94</td>\n", | |
| " <td>1.00</td>\n", | |
| " <td>9.00</td>\n", | |
| " <td>21.00</td>\n", | |
| " <td>36.00</td>\n", | |
| " <td>165.00</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>350.00</td>\n", | |
| " <td>27.60</td>\n", | |
| " <td>26.03</td>\n", | |
| " <td>1.00</td>\n", | |
| " <td>10.25</td>\n", | |
| " <td>21.00</td>\n", | |
| " <td>37.75</td>\n", | |
| " <td>220.00</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " count mean std min 25% 50% 75% max\n", | |
| "case \n", | |
| "0 301.00 26.16 24.94 1.00 9.00 21.00 36.00 165.00\n", | |
| "1 350.00 27.60 26.03 1.00 10.25 21.00 37.75 220.00" | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df[df['packyrs'] > 0].groupby('case')['packyrs'].describe()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "case mrc_score\n", | |
| "0 0.00 254\n", | |
| " 1.00 65\n", | |
| " 2.00 80\n", | |
| " 3.00 65\n", | |
| " 4.00 2\n", | |
| "1 0.00 35\n", | |
| " 1.00 94\n", | |
| " 2.00 165\n", | |
| " 3.00 172\n", | |
| " 4.00 28\n", | |
| "Name: mrc_score, dtype: int64\n", | |
| "case mrc_score\n", | |
| "0 0.00 0.55\n", | |
| " 1.00 0.14\n", | |
| " 2.00 0.17\n", | |
| " 3.00 0.14\n", | |
| " 4.00 0.00\n", | |
| "1 0.00 0.07\n", | |
| " 1.00 0.19\n", | |
| " 2.00 0.33\n", | |
| " 3.00 0.35\n", | |
| " 4.00 0.06\n", | |
| "Name: mrc_score, dtype: float64\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print (df.groupby('case')['mrc_score'].value_counts(sort=False))\n", | |
| "print (df.groupby('case')['mrc_score'].value_counts(sort=False, normalize=True))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# table two" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "case peto_exposed\n", | |
| "0 0 173\n", | |
| " 1 293\n", | |
| "1 0 166\n", | |
| " 1 328\n", | |
| "Name: peto_exposed, dtype: int64\n", | |
| "case peto_exposed\n", | |
| "0 0 0.37\n", | |
| " 1 0.63\n", | |
| "1 0 0.34\n", | |
| " 1 0.66\n", | |
| "Name: peto_exposed, dtype: float64\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(df.groupby('case').peto_exposed.value_counts(sort=False) )\n", | |
| "print(df.groupby('case').peto_exposed.value_counts(sort=False,normalize=True) )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 22, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "case lowest_peto_cat_reordered\n", | |
| "0 1 74\n", | |
| " 2 99\n", | |
| " 3 115\n", | |
| " 4 127\n", | |
| " 5 51\n", | |
| "1 1 72\n", | |
| " 2 94\n", | |
| " 3 124\n", | |
| " 4 140\n", | |
| " 5 64\n", | |
| "Name: lowest_peto_cat_reordered, dtype: int64\n", | |
| "case lowest_peto_cat_reordered\n", | |
| "0 1 0.16\n", | |
| " 2 0.21\n", | |
| " 3 0.25\n", | |
| " 4 0.27\n", | |
| " 5 0.11\n", | |
| "1 1 0.15\n", | |
| " 2 0.19\n", | |
| " 3 0.25\n", | |
| " 4 0.28\n", | |
| " 5 0.13\n", | |
| "Name: lowest_peto_cat_reordered, dtype: float64\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(df.groupby('case').lowest_peto_cat_reordered.value_counts(sort=False) )\n", | |
| "print(df.groupby('case').lowest_peto_cat_reordered.value_counts(normalize=True,sort=False) )" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0 0.80\n", | |
| "1 0.00\n", | |
| "2 0.00\n", | |
| "3 0.00\n", | |
| "4 0.00\n", | |
| " ... \n", | |
| "955 0.00\n", | |
| "956 0.00\n", | |
| "957 0.00\n", | |
| "958 12.42\n", | |
| "959 0.00\n", | |
| "Name: fibre_ml_exposure, Length: 960, dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.fibre_ml_exposure" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| " count mean std min 25% 50% 75% max\n", | |
| "case \n", | |
| "0 108.00 617.34 3,078.48 0.00 0.22 4.37 57.36 23,785.71\n", | |
| "1 122.00 1,117.93 5,641.48 0.00 0.30 5.86 62.98 50,761.90\n", | |
| " count mean std min 25% 50% 75% max\n", | |
| "case \n", | |
| "0 24.00 2,743.04 6,166.46 105.21 216.02 466.14 1,182.85 23,785.71\n", | |
| "1 27.00 5,013.71 11,308.10 102.74 187.29 594.64 3,615.46 50,761.90\n", | |
| " count mean std min 25% 50% 75% max\n", | |
| "case \n", | |
| "0 29.00 2,281.78 5,682.68 52.86 144.00 290.83 1,126.29 23,785.71\n", | |
| "1 34.00 3,995.43 10,240.56 55.15 121.27 297.05 3,214.13 50,761.90\n", | |
| " count mean std min 25% 50% 75% max\n", | |
| "case \n", | |
| "0 35.00 1,896.92 5,227.91 27.12 66.32 222.00 1,067.43 23,785.71\n", | |
| "1 40.00 3,401.64 9,528.10 28.81 68.42 187.29 1,660.67 50,761.90\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(df[df.fibre_ml_exposure > 0].groupby('case').fibre_ml_exposure.describe())\n", | |
| "print(df[df.fibre_ml_exposure >= 100].groupby('case').fibre_ml_exposure.describe())\n", | |
| "print(df[df.fibre_ml_exposure >= 50].groupby('case').fibre_ml_exposure.describe())\n", | |
| "print(df[df.fibre_ml_exposure >= 25].groupby('case').fibre_ml_exposure.describe())\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# table three" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.692044\n", | |
| " Iterations 3\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 960\n", | |
| "Model: Logit Df Residuals: 958\n", | |
| "Method: MLE Df Model: 1\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.0009785\n", | |
| "Time: 18:34:48 Log-Likelihood: -664.36\n", | |
| "converged: True LL-Null: -665.01\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.2540\n", | |
| "================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------\n", | |
| "Intercept -0.0413 0.109 -0.380 0.704 -0.254 0.172\n", | |
| "peto_exposed 0.1541 0.135 1.141 0.254 -0.111 0.419\n", | |
| "================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.78 1.19 0.96\n", | |
| "peto_exposed 0.90 1.52 1.17\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ peto_exposed', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 31, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.631284\n", | |
| " Iterations 7\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 960\n", | |
| "Model: Logit Df Residuals: 936\n", | |
| "Method: MLE Df Model: 23\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.08869\n", | |
| "Time: 18:34:50 Log-Likelihood: -606.03\n", | |
| "converged: True LL-Null: -665.01\n", | |
| "Covariance Type: nonrobust LLR p-value: 9.668e-15\n", | |
| "===================================================================================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "---------------------------------------------------------------------------------------------------------------------------------------------------\n", | |
| "Intercept -3.2245 0.786 -4.101 0.000 -4.766 -1.684\n", | |
| "C(centre_name)[T.Aintree University Hospitals NHS Foundation Trust] -0.2512 0.496 -0.507 0.612 -1.223 0.721\n", | |
| "C(centre_name)[T.Glasgow Royal Infirmary] 0.3597 0.894 0.402 0.688 -1.393 2.112\n", | |
| "C(centre_name)[T.Guys’ and St Thomas’ NHS Foundation Trust] -0.0053 0.516 -0.010 0.992 -1.018 1.007\n", | |
| "C(centre_name)[T.Heart of England NHS Foundation Trust] -0.1430 0.493 -0.290 0.772 -1.110 0.824\n", | |
| "C(centre_name)[T.Imperial College Healthcare NHS Trust] -1.4553 0.506 -2.874 0.004 -2.448 -0.463\n", | |
| "C(centre_name)[T.Leeds Teaching Hospitals NHS Trust] 2.7931 0.849 3.290 0.001 1.129 4.457\n", | |
| "C(centre_name)[T.Morriston Hospital] -0.0182 0.605 -0.030 0.976 -1.203 1.167\n", | |
| "C(centre_name)[T.Norfolk and Norwich University Hospitals NHS Foundation Trust] 1.1716 0.925 1.267 0.205 -0.641 2.985\n", | |
| "C(centre_name)[T.North Bristol NHS Trust] -0.2862 0.515 -0.556 0.578 -1.296 0.723\n", | |
| "C(centre_name)[T.Nottingham University Hospitals NHS Trust] -0.2194 0.500 -0.438 0.661 -1.200 0.761\n", | |
| "C(centre_name)[T.Papworth Hospital NHS Foundation Trust] -0.2108 0.501 -0.421 0.674 -1.192 0.770\n", | |
| "C(centre_name)[T.Portsmouth Hospitals NHS Trust] -0.4812 0.733 -0.656 0.512 -1.918 0.955\n", | |
| "C(centre_name)[T.Royal Devon and Exeter NHS Foundation Trust] -0.2196 0.574 -0.383 0.702 -1.345 0.905\n", | |
| "C(centre_name)[T.Royal Infirmary of Edinburgh] -0.3008 0.587 -0.512 0.608 -1.451 0.850\n", | |
| "C(centre_name)[T.Southampton University Hospitals NHS Trust] -0.2792 0.485 -0.576 0.565 -1.230 0.671\n", | |
| "C(centre_name)[T.Taunton and Somerset NHS Foundation Trust] -0.1191 0.579 -0.206 0.837 -1.253 1.015\n", | |
| "C(centre_name)[T.The Newcastle Upon Tyne Hospitals NHS Foundation Trust] -0.0031 0.555 -0.006 0.995 -1.091 1.085\n", | |
| "C(centre_name)[T.University Hospital of South Manchester] 0.1289 0.501 0.257 0.797 -0.853 1.111\n", | |
| "C(centre_name)[T.University Hospitals Birmingham NHS Foundation Trust] 2.7552 1.127 2.446 0.014 0.547 4.963\n", | |
| "C(centre_name)[T.Worcestershire Acute Hospitals NHS Trust] -0.6483 0.678 -0.956 0.339 -1.977 0.681\n", | |
| "age 0.0416 0.009 4.677 0.000 0.024 0.059\n", | |
| "ever_smoked 0.3310 0.158 2.097 0.036 0.022 0.640\n", | |
| "peto_exposed 0.0875 0.146 0.599 0.549 -0.199 0.374\n", | |
| "===================================================================================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.01 0.19 0.04\n", | |
| "C(centre_name)[T.Aintree University Hospitals N... 0.29 2.06 0.78\n", | |
| "C(centre_name)[T.Glasgow Royal Infirmary] 0.25 8.27 1.43\n", | |
| "C(centre_name)[T.Guys’ and St Thomas’ NHS Found... 0.36 2.74 0.99\n", | |
| "C(centre_name)[T.Heart of England NHS Foundatio... 0.33 2.28 0.87\n", | |
| "C(centre_name)[T.Imperial College Healthcare NH... 0.09 0.63 0.23\n", | |
| "C(centre_name)[T.Leeds Teaching Hospitals NHS T... 3.09 86.22 16.33\n", | |
| "C(centre_name)[T.Morriston Hospital] 0.30 3.21 0.98\n", | |
| "C(centre_name)[T.Norfolk and Norwich University... 0.53 19.78 3.23\n", | |
| "C(centre_name)[T.North Bristol NHS Trust] 0.27 2.06 0.75\n", | |
| "C(centre_name)[T.Nottingham University Hospital... 0.30 2.14 0.80\n", | |
| "C(centre_name)[T.Papworth Hospital NHS Foundati... 0.30 2.16 0.81\n", | |
| "C(centre_name)[T.Portsmouth Hospitals NHS Trust] 0.15 2.60 0.62\n", | |
| "C(centre_name)[T.Royal Devon and Exeter NHS Fou... 0.26 2.47 0.80\n", | |
| "C(centre_name)[T.Royal Infirmary of Edinburgh] 0.23 2.34 0.74\n", | |
| "C(centre_name)[T.Southampton University Hospita... 0.29 1.96 0.76\n", | |
| "C(centre_name)[T.Taunton and Somerset NHS Found... 0.29 2.76 0.89\n", | |
| "C(centre_name)[T.The Newcastle Upon Tyne Hospit... 0.34 2.96 1.00\n", | |
| "C(centre_name)[T.University Hospital of South M... 0.43 3.04 1.14\n", | |
| "C(centre_name)[T.University Hospitals Birmingha... 1.73 143.04 15.72\n", | |
| "C(centre_name)[T.Worcestershire Acute Hospitals... 0.14 1.98 0.52\n", | |
| "age 1.02 1.06 1.04\n", | |
| "ever_smoked 1.02 1.90 1.39\n", | |
| "peto_exposed 0.82 1.45 1.09\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + ever_smoked + C(centre_name) + peto_exposed', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 32, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.691792\n", | |
| " Iterations 4\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 960\n", | |
| "Model: Logit Df Residuals: 955\n", | |
| "Method: MLE Df Model: 4\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.001342\n", | |
| "Time: 18:35:11 Log-Likelihood: -664.12\n", | |
| "converged: True LL-Null: -665.01\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.7752\n", | |
| "=====================================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "-----------------------------------------------------------------------------------------------------\n", | |
| "Intercept -0.0274 0.166 -0.166 0.869 -0.352 0.297\n", | |
| "C(lowest_peto_cat_reordered)[T.2] -0.0244 0.219 -0.111 0.911 -0.454 0.406\n", | |
| "C(lowest_peto_cat_reordered)[T.3] 0.1027 0.210 0.489 0.625 -0.309 0.515\n", | |
| "C(lowest_peto_cat_reordered)[T.4] 0.1249 0.206 0.606 0.544 -0.279 0.529\n", | |
| "C(lowest_peto_cat_reordered)[T.5] 0.2545 0.250 1.017 0.309 -0.236 0.745\n", | |
| "=====================================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.70 1.35 0.97\n", | |
| "C(lowest_peto_cat_reordered)[T.2] 0.63 1.50 0.98\n", | |
| "C(lowest_peto_cat_reordered)[T.3] 0.73 1.67 1.11\n", | |
| "C(lowest_peto_cat_reordered)[T.4] 0.76 1.70 1.13\n", | |
| "C(lowest_peto_cat_reordered)[T.5] 0.79 2.11 1.29\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ C(lowest_peto_cat_reordered)', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.630565\n", | |
| " Iterations 7\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 960\n", | |
| "Model: Logit Df Residuals: 933\n", | |
| "Method: MLE Df Model: 26\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.08973\n", | |
| "Time: 18:35:59 Log-Likelihood: -605.34\n", | |
| "converged: True LL-Null: -665.01\n", | |
| "Covariance Type: nonrobust LLR p-value: 6.451e-14\n", | |
| "===================================================================================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "---------------------------------------------------------------------------------------------------------------------------------------------------\n", | |
| "Intercept -3.1901 0.798 -3.996 0.000 -4.755 -1.625\n", | |
| "C(centre_name)[T.Aintree University Hospitals NHS Foundation Trust] -0.2911 0.498 -0.584 0.559 -1.267 0.685\n", | |
| "C(centre_name)[T.Glasgow Royal Infirmary] 0.3443 0.894 0.385 0.700 -1.407 2.096\n", | |
| "C(centre_name)[T.Guys’ and St Thomas’ NHS Foundation Trust] -0.0436 0.518 -0.084 0.933 -1.059 0.972\n", | |
| "C(centre_name)[T.Heart of England NHS Foundation Trust] -0.1706 0.495 -0.345 0.730 -1.141 0.800\n", | |
| "C(centre_name)[T.Imperial College Healthcare NHS Trust] -1.5025 0.509 -2.951 0.003 -2.500 -0.505\n", | |
| "C(centre_name)[T.Leeds Teaching Hospitals NHS Trust] 2.7754 0.849 3.268 0.001 1.111 4.440\n", | |
| "C(centre_name)[T.Morriston Hospital] -0.0212 0.605 -0.035 0.972 -1.207 1.165\n", | |
| "C(centre_name)[T.Norfolk and Norwich University Hospitals NHS Foundation Trust] 1.1798 0.929 1.270 0.204 -0.641 3.000\n", | |
| "C(centre_name)[T.North Bristol NHS Trust] -0.3192 0.516 -0.618 0.536 -1.331 0.693\n", | |
| "C(centre_name)[T.Nottingham University Hospitals NHS Trust] -0.2394 0.502 -0.477 0.633 -1.223 0.745\n", | |
| "C(centre_name)[T.Papworth Hospital NHS Foundation Trust] -0.2279 0.502 -0.454 0.650 -1.211 0.755\n", | |
| "C(centre_name)[T.Portsmouth Hospitals NHS Trust] -0.5405 0.735 -0.735 0.462 -1.981 0.900\n", | |
| "C(centre_name)[T.Royal Devon and Exeter NHS Foundation Trust] -0.2412 0.576 -0.419 0.675 -1.370 0.887\n", | |
| "C(centre_name)[T.Royal Infirmary of Edinburgh] -0.3293 0.588 -0.560 0.576 -1.483 0.824\n", | |
| "C(centre_name)[T.Southampton University Hospitals NHS Trust] -0.2999 0.486 -0.617 0.537 -1.253 0.653\n", | |
| "C(centre_name)[T.Taunton and Somerset NHS Foundation Trust] -0.1447 0.580 -0.250 0.803 -1.281 0.992\n", | |
| "C(centre_name)[T.The Newcastle Upon Tyne Hospitals NHS Foundation Trust] -0.0439 0.557 -0.079 0.937 -1.135 1.047\n", | |
| "C(centre_name)[T.University Hospital of South Manchester] 0.0894 0.503 0.178 0.859 -0.897 1.076\n", | |
| "C(centre_name)[T.University Hospitals Birmingham NHS Foundation Trust] 2.7657 1.127 2.453 0.014 0.556 4.975\n", | |
| "C(centre_name)[T.Worcestershire Acute Hospitals NHS Trust] -0.6496 0.680 -0.955 0.340 -1.983 0.683\n", | |
| "C(lowest_peto_cat_reordered)[T.2] -0.1935 0.237 -0.816 0.415 -0.658 0.271\n", | |
| "C(lowest_peto_cat_reordered)[T.3] -0.0767 0.228 -0.336 0.737 -0.524 0.371\n", | |
| "C(lowest_peto_cat_reordered)[T.4] 0.0603 0.223 0.270 0.787 -0.377 0.498\n", | |
| "C(lowest_peto_cat_reordered)[T.5] -0.1063 0.271 -0.393 0.694 -0.637 0.424\n", | |
| "age 0.0430 0.009 4.758 0.000 0.025 0.061\n", | |
| "ever_smoked 0.3333 0.158 2.104 0.035 0.023 0.644\n", | |
| "===================================================================================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.01 0.20 0.04\n", | |
| "C(centre_name)[T.Aintree University Hospitals N... 0.28 1.98 0.75\n", | |
| "C(centre_name)[T.Glasgow Royal Infirmary] 0.24 8.13 1.41\n", | |
| "C(centre_name)[T.Guys’ and St Thomas’ NHS Found... 0.35 2.64 0.96\n", | |
| "C(centre_name)[T.Heart of England NHS Foundatio... 0.32 2.23 0.84\n", | |
| "C(centre_name)[T.Imperial College Healthcare NH... 0.08 0.60 0.22\n", | |
| "C(centre_name)[T.Leeds Teaching Hospitals NHS T... 3.04 84.75 16.05\n", | |
| "C(centre_name)[T.Morriston Hospital] 0.30 3.21 0.98\n", | |
| "C(centre_name)[T.Norfolk and Norwich University... 0.53 20.09 3.25\n", | |
| "C(centre_name)[T.North Bristol NHS Trust] 0.26 2.00 0.73\n", | |
| "C(centre_name)[T.Nottingham University Hospital... 0.29 2.11 0.79\n", | |
| "C(centre_name)[T.Papworth Hospital NHS Foundati... 0.30 2.13 0.80\n", | |
| "C(centre_name)[T.Portsmouth Hospitals NHS Trust] 0.14 2.46 0.58\n", | |
| "C(centre_name)[T.Royal Devon and Exeter NHS Fou... 0.25 2.43 0.79\n", | |
| "C(centre_name)[T.Royal Infirmary of Edinburgh] 0.23 2.28 0.72\n", | |
| "C(centre_name)[T.Southampton University Hospita... 0.29 1.92 0.74\n", | |
| "C(centre_name)[T.Taunton and Somerset NHS Found... 0.28 2.70 0.87\n", | |
| "C(centre_name)[T.The Newcastle Upon Tyne Hospit... 0.32 2.85 0.96\n", | |
| "C(centre_name)[T.University Hospital of South M... 0.41 2.93 1.09\n", | |
| "C(centre_name)[T.University Hospitals Birmingha... 1.74 144.78 15.89\n", | |
| "C(centre_name)[T.Worcestershire Acute Hospitals... 0.14 1.98 0.52\n", | |
| "C(lowest_peto_cat_reordered)[T.2] 0.52 1.31 0.82\n", | |
| "C(lowest_peto_cat_reordered)[T.3] 0.59 1.45 0.93\n", | |
| "C(lowest_peto_cat_reordered)[T.4] 0.69 1.64 1.06\n", | |
| "C(lowest_peto_cat_reordered)[T.5] 0.53 1.53 0.90\n", | |
| "age 1.03 1.06 1.04\n", | |
| "ever_smoked 1.02 1.90 1.40\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + ever_smoked + C(centre_name) + C(lowest_peto_cat_reordered)', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.690922\n", | |
| " Iterations 3\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 960\n", | |
| "Model: Logit Df Residuals: 958\n", | |
| "Method: MLE Df Model: 1\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.002598\n", | |
| "Time: 18:42:04 Log-Likelihood: -663.29\n", | |
| "converged: True LL-Null: -665.01\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.06305\n", | |
| "===============================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "-------------------------------------------------------------------------------\n", | |
| "Intercept -0.1387 0.124 -1.115 0.265 -0.382 0.105\n", | |
| "ever_smoked 0.2703 0.146 1.857 0.063 -0.015 0.556\n", | |
| "===============================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.68 1.11 0.87\n", | |
| "ever_smoked 0.99 1.74 1.31\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ ever_smoked', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.631471\n", | |
| " Iterations 7\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 960\n", | |
| "Model: Logit Df Residuals: 937\n", | |
| "Method: MLE Df Model: 22\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.08842\n", | |
| "Time: 18:42:05 Log-Likelihood: -606.21\n", | |
| "converged: True LL-Null: -665.01\n", | |
| "Covariance Type: nonrobust LLR p-value: 4.746e-15\n", | |
| "===================================================================================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "---------------------------------------------------------------------------------------------------------------------------------------------------\n", | |
| "Intercept -3.1662 0.779 -4.065 0.000 -4.693 -1.640\n", | |
| "C(centre_name)[T.Aintree University Hospitals NHS Foundation Trust] -0.2398 0.495 -0.485 0.628 -1.210 0.730\n", | |
| "C(centre_name)[T.Glasgow Royal Infirmary] 0.3385 0.893 0.379 0.705 -1.413 2.090\n", | |
| "C(centre_name)[T.Guys’ and St Thomas’ NHS Foundation Trust] -0.0125 0.516 -0.024 0.981 -1.023 0.998\n", | |
| "C(centre_name)[T.Heart of England NHS Foundation Trust] -0.1352 0.493 -0.274 0.784 -1.101 0.830\n", | |
| "C(centre_name)[T.Imperial College Healthcare NHS Trust] -1.4626 0.506 -2.893 0.004 -2.453 -0.472\n", | |
| "C(centre_name)[T.Leeds Teaching Hospitals NHS Trust] 2.7912 0.848 3.290 0.001 1.128 4.454\n", | |
| "C(centre_name)[T.Morriston Hospital] -0.0092 0.604 -0.015 0.988 -1.193 1.174\n", | |
| "C(centre_name)[T.Norfolk and Norwich University Hospitals NHS Foundation Trust] 1.2009 0.923 1.301 0.193 -0.609 3.011\n", | |
| "C(centre_name)[T.North Bristol NHS Trust] -0.2879 0.514 -0.560 0.576 -1.296 0.720\n", | |
| "C(centre_name)[T.Nottingham University Hospitals NHS Trust] -0.2179 0.500 -0.436 0.663 -1.197 0.761\n", | |
| "C(centre_name)[T.Papworth Hospital NHS Foundation Trust] -0.2182 0.500 -0.436 0.663 -1.198 0.762\n", | |
| "C(centre_name)[T.Portsmouth Hospitals NHS Trust] -0.4710 0.733 -0.643 0.520 -1.907 0.965\n", | |
| "C(centre_name)[T.Royal Devon and Exeter NHS Foundation Trust] -0.2145 0.574 -0.374 0.708 -1.339 0.910\n", | |
| "C(centre_name)[T.Royal Infirmary of Edinburgh] -0.2962 0.587 -0.505 0.614 -1.446 0.854\n", | |
| "C(centre_name)[T.Southampton University Hospitals NHS Trust] -0.2776 0.484 -0.573 0.566 -1.227 0.672\n", | |
| "C(centre_name)[T.Taunton and Somerset NHS Foundation Trust] -0.1170 0.578 -0.202 0.840 -1.250 1.016\n", | |
| "C(centre_name)[T.The Newcastle Upon Tyne Hospitals NHS Foundation Trust] 0.0018 0.554 0.003 0.997 -1.085 1.088\n", | |
| "C(centre_name)[T.University Hospital of South Manchester] 0.1253 0.501 0.250 0.802 -0.856 1.106\n", | |
| "C(centre_name)[T.University Hospitals Birmingham NHS Foundation Trust] 2.7560 1.126 2.447 0.014 0.549 4.963\n", | |
| "C(centre_name)[T.Worcestershire Acute Hospitals NHS Trust] -0.6500 0.678 -0.959 0.337 -1.978 0.678\n", | |
| "age 0.0415 0.009 4.668 0.000 0.024 0.059\n", | |
| "ever_smoked 0.3403 0.157 2.166 0.030 0.032 0.648\n", | |
| "===================================================================================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.01 0.19 0.04\n", | |
| "C(centre_name)[T.Aintree University Hospitals N... 0.30 2.08 0.79\n", | |
| "C(centre_name)[T.Glasgow Royal Infirmary] 0.24 8.08 1.40\n", | |
| "C(centre_name)[T.Guys’ and St Thomas’ NHS Found... 0.36 2.71 0.99\n", | |
| "C(centre_name)[T.Heart of England NHS Foundatio... 0.33 2.29 0.87\n", | |
| "C(centre_name)[T.Imperial College Healthcare NH... 0.09 0.62 0.23\n", | |
| "C(centre_name)[T.Leeds Teaching Hospitals NHS T... 3.09 85.98 16.30\n", | |
| "C(centre_name)[T.Morriston Hospital] 0.30 3.24 0.99\n", | |
| "C(centre_name)[T.Norfolk and Norwich University... 0.54 20.30 3.32\n", | |
| "C(centre_name)[T.North Bristol NHS Trust] 0.27 2.05 0.75\n", | |
| "C(centre_name)[T.Nottingham University Hospital... 0.30 2.14 0.80\n", | |
| "C(centre_name)[T.Papworth Hospital NHS Foundati... 0.30 2.14 0.80\n", | |
| "C(centre_name)[T.Portsmouth Hospitals NHS Trust] 0.15 2.62 0.62\n", | |
| "C(centre_name)[T.Royal Devon and Exeter NHS Fou... 0.26 2.48 0.81\n", | |
| "C(centre_name)[T.Royal Infirmary of Edinburgh] 0.24 2.35 0.74\n", | |
| "C(centre_name)[T.Southampton University Hospita... 0.29 1.96 0.76\n", | |
| "C(centre_name)[T.Taunton and Somerset NHS Found... 0.29 2.76 0.89\n", | |
| "C(centre_name)[T.The Newcastle Upon Tyne Hospit... 0.34 2.97 1.00\n", | |
| "C(centre_name)[T.University Hospital of South M... 0.43 3.02 1.13\n", | |
| "C(centre_name)[T.University Hospitals Birmingha... 1.73 143.07 15.74\n", | |
| "C(centre_name)[T.Worcestershire Acute Hospitals... 0.14 1.97 0.52\n", | |
| "age 1.02 1.06 1.04\n", | |
| "ever_smoked 1.03 1.91 1.41\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + ever_smoked + C(centre_name)', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 36, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.688784\n", | |
| " Iterations 4\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 960\n", | |
| "Model: Logit Df Residuals: 956\n", | |
| "Method: MLE Df Model: 3\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.005684\n", | |
| "Time: 18:42:07 Log-Likelihood: -661.23\n", | |
| "converged: True LL-Null: -665.01\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.05604\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept 2.236e-16 0.186 1.2e-15 1.000 -0.364 0.364\n", | |
| "ever_smoked -0.0628 0.229 -0.274 0.784 -0.512 0.386\n", | |
| "peto_exposed -0.2513 0.250 -1.004 0.316 -0.742 0.239\n", | |
| "ever_smoked:peto_exposed 0.5373 0.299 1.799 0.072 -0.048 1.122\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.69 1.44 1.00\n", | |
| "ever_smoked 0.60 1.47 0.94\n", | |
| "peto_exposed 0.48 1.27 0.78\n", | |
| "ever_smoked:peto_exposed 0.95 3.07 1.71\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ ever_smoked*peto_exposed', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 37, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.629996\n", | |
| " Iterations 7\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 960\n", | |
| "Model: Logit Df Residuals: 935\n", | |
| "Method: MLE Df Model: 24\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.09055\n", | |
| "Time: 18:42:09 Log-Likelihood: -604.80\n", | |
| "converged: True LL-Null: -665.01\n", | |
| "Covariance Type: nonrobust LLR p-value: 8.124e-15\n", | |
| "===================================================================================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "---------------------------------------------------------------------------------------------------------------------------------------------------\n", | |
| "Intercept -3.0969 0.789 -3.928 0.000 -4.642 -1.551\n", | |
| "C(centre_name)[T.Aintree University Hospitals NHS Foundation Trust] -0.2009 0.496 -0.405 0.686 -1.174 0.772\n", | |
| "C(centre_name)[T.Glasgow Royal Infirmary] 0.4814 0.901 0.534 0.593 -1.284 2.247\n", | |
| "C(centre_name)[T.Guys’ and St Thomas’ NHS Foundation Trust] 0.0454 0.517 0.088 0.930 -0.967 1.058\n", | |
| "C(centre_name)[T.Heart of England NHS Foundation Trust] -0.0932 0.494 -0.189 0.850 -1.061 0.874\n", | |
| "C(centre_name)[T.Imperial College Healthcare NHS Trust] -1.4030 0.506 -2.772 0.006 -2.395 -0.411\n", | |
| "C(centre_name)[T.Leeds Teaching Hospitals NHS Trust] 2.8188 0.848 3.322 0.001 1.156 4.482\n", | |
| "C(centre_name)[T.Morriston Hospital] 0.0153 0.604 0.025 0.980 -1.168 1.198\n", | |
| "C(centre_name)[T.Norfolk and Norwich University Hospitals NHS Foundation Trust] 1.1958 0.924 1.294 0.196 -0.615 3.007\n", | |
| "C(centre_name)[T.North Bristol NHS Trust] -0.2484 0.515 -0.483 0.629 -1.257 0.760\n", | |
| "C(centre_name)[T.Nottingham University Hospitals NHS Trust] -0.1411 0.502 -0.281 0.779 -1.125 0.843\n", | |
| "C(centre_name)[T.Papworth Hospital NHS Foundation Trust] -0.1551 0.501 -0.310 0.757 -1.137 0.827\n", | |
| "C(centre_name)[T.Portsmouth Hospitals NHS Trust] -0.3924 0.736 -0.533 0.594 -1.834 1.049\n", | |
| "C(centre_name)[T.Royal Devon and Exeter NHS Foundation Trust] -0.1423 0.575 -0.247 0.805 -1.270 0.985\n", | |
| "C(centre_name)[T.Royal Infirmary of Edinburgh] -0.2344 0.588 -0.399 0.690 -1.387 0.918\n", | |
| "C(centre_name)[T.Southampton University Hospitals NHS Trust] -0.2419 0.484 -0.499 0.617 -1.191 0.707\n", | |
| "C(centre_name)[T.Taunton and Somerset NHS Foundation Trust] -0.0710 0.578 -0.123 0.902 -1.204 1.062\n", | |
| "C(centre_name)[T.The Newcastle Upon Tyne Hospitals NHS Foundation Trust] 0.0117 0.554 0.021 0.983 -1.074 1.098\n", | |
| "C(centre_name)[T.University Hospital of South Manchester] 0.1869 0.502 0.373 0.709 -0.796 1.170\n", | |
| "C(centre_name)[T.University Hospitals Birmingham NHS Foundation Trust] 2.8145 1.128 2.495 0.013 0.604 5.025\n", | |
| "C(centre_name)[T.Worcestershire Acute Hospitals NHS Trust] -0.5971 0.678 -0.881 0.378 -1.926 0.732\n", | |
| "age 0.0419 0.009 4.708 0.000 0.024 0.059\n", | |
| "ever_smoked 0.0299 0.248 0.121 0.904 -0.456 0.516\n", | |
| "peto_exposed -0.2690 0.270 -0.996 0.319 -0.798 0.260\n", | |
| "ever_smoked:peto_exposed 0.5055 0.322 1.571 0.116 -0.125 1.136\n", | |
| "===================================================================================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.01 0.21 0.05\n", | |
| "C(centre_name)[T.Aintree University Hospitals N... 0.31 2.16 0.82\n", | |
| "C(centre_name)[T.Glasgow Royal Infirmary] 0.28 9.46 1.62\n", | |
| "C(centre_name)[T.Guys’ and St Thomas’ NHS Found... 0.38 2.88 1.05\n", | |
| "C(centre_name)[T.Heart of England NHS Foundatio... 0.35 2.40 0.91\n", | |
| "C(centre_name)[T.Imperial College Healthcare NH... 0.09 0.66 0.25\n", | |
| "C(centre_name)[T.Leeds Teaching Hospitals NHS T... 3.18 88.39 16.76\n", | |
| "C(centre_name)[T.Morriston Hospital] 0.31 3.31 1.02\n", | |
| "C(centre_name)[T.Norfolk and Norwich University... 0.54 20.22 3.31\n", | |
| "C(centre_name)[T.North Bristol NHS Trust] 0.28 2.14 0.78\n", | |
| "C(centre_name)[T.Nottingham University Hospital... 0.32 2.32 0.87\n", | |
| "C(centre_name)[T.Papworth Hospital NHS Foundati... 0.32 2.29 0.86\n", | |
| "C(centre_name)[T.Portsmouth Hospitals NHS Trust] 0.16 2.86 0.68\n", | |
| "C(centre_name)[T.Royal Devon and Exeter NHS Fou... 0.28 2.68 0.87\n", | |
| "C(centre_name)[T.Royal Infirmary of Edinburgh] 0.25 2.50 0.79\n", | |
| "C(centre_name)[T.Southampton University Hospita... 0.30 2.03 0.79\n", | |
| "C(centre_name)[T.Taunton and Somerset NHS Found... 0.30 2.89 0.93\n", | |
| "C(centre_name)[T.The Newcastle Upon Tyne Hospit... 0.34 3.00 1.01\n", | |
| "C(centre_name)[T.University Hospital of South M... 0.45 3.22 1.21\n", | |
| "C(centre_name)[T.University Hospitals Birmingha... 1.83 152.17 16.68\n", | |
| "C(centre_name)[T.Worcestershire Acute Hospitals... 0.15 2.08 0.55\n", | |
| "age 1.02 1.06 1.04\n", | |
| "ever_smoked 0.63 1.67 1.03\n", | |
| "peto_exposed 0.45 1.30 0.76\n", | |
| "ever_smoked:peto_exposed 0.88 3.11 1.66\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + ever_smoked*peto_exposed + C(centre_name)', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# table four" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 38, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/home/drcjar/anaconda3/envs/ipfjes/lib/python3.5/site-packages/pandas/core/indexing.py:376: SettingWithCopyWarning: \n", | |
| "A value is trying to be set on a copy of a slice from a DataFrame.\n", | |
| "Try using .loc[row_indexer,col_indexer] = value instead\n", | |
| "\n", | |
| "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", | |
| " self.obj[key] = _infer_fill_value(value)\n", | |
| "/home/drcjar/anaconda3/envs/ipfjes/lib/python3.5/site-packages/pandas/core/indexing.py:494: SettingWithCopyWarning: \n", | |
| "A value is trying to be set on a copy of a slice from a DataFrame.\n", | |
| "Try using .loc[row_indexer,col_indexer] = value instead\n", | |
| "\n", | |
| "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", | |
| " self.obj[item] = s\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "gt = pd.read_csv('genotyping_cleaned.csv')\n", | |
| "\n", | |
| "transforms = {'Heterozygous':'(G;T)', 'Homozygous Allele 1':'(G;G)', 'Homozygous Allele 2':'(T;T)',\n", | |
| " 'Homozygous Allele 1 ':'(G;G)', 'Homoxygous Allele 1':'(G;G)',\n", | |
| " ' Homozygous Allele 2':'(T;T)', ' Homozygous Allele 1':'(G;G)'}\n", | |
| "gt.genotype = gt.genotype.map(transforms)\n", | |
| "\n", | |
| "transforms = {\"(G;G)\":0, \"(G;T)\":1, \"(T;T)\":2}\n", | |
| "\n", | |
| "gt.genotype = gt.genotype.map(transforms) # change genotype to 0,1,2 (assumes additive model)\n", | |
| "\n", | |
| "# add genotype\n", | |
| "genotype_lookup = gt[['participant_id', 'genotype']]\n", | |
| "genotype_lookup.index = genotype_lookup['participant_id']\n", | |
| "genotype_lookup = genotype_lookup['genotype'].to_dict()\n", | |
| "\n", | |
| "df['genotype'] = df.participant_id.map(genotype_lookup)\n", | |
| "\n", | |
| "df = df[df['genotype'].notnull()] # restrict analysis to participants with exposure data and genotype\n", | |
| "\n", | |
| "df.loc[:,'minor_allele'] = df['genotype'] > 0\n", | |
| "\n", | |
| "df.loc[:,'minor_allele'] = df.loc[:,'minor_allele'].astype(int)\n", | |
| "\n", | |
| "df.to_csv('flat_data_genotype_subset.csv', index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.688106\n", | |
| " Iterations 4\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 818\n", | |
| "Model: Logit Df Residuals: 814\n", | |
| "Method: MLE Df Model: 3\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.006433\n", | |
| "Time: 18:50:00 Log-Likelihood: -562.87\n", | |
| "converged: True LL-Null: -566.52\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.06325\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept -0.2091 0.205 -1.019 0.308 -0.611 0.193\n", | |
| "ever_smoked 0.0483 0.252 0.192 0.848 -0.446 0.543\n", | |
| "peto_exposed -0.1773 0.272 -0.653 0.514 -0.710 0.355\n", | |
| "ever_smoked:peto_exposed 0.4471 0.324 1.378 0.168 -0.189 1.083\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.54 1.21 0.81\n", | |
| "ever_smoked 0.64 1.72 1.05\n", | |
| "peto_exposed 0.49 1.43 0.84\n", | |
| "ever_smoked:peto_exposed 0.83 2.95 1.56\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ ever_smoked*peto_exposed', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 40, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.681665\n", | |
| " Iterations 4\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 818\n", | |
| "Model: Logit Df Residuals: 813\n", | |
| "Method: MLE Df Model: 4\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.01573\n", | |
| "Time: 18:50:01 Log-Likelihood: -557.60\n", | |
| "converged: True LL-Null: -566.52\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.001335\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept -2.3285 0.694 -3.356 0.001 -3.689 -0.969\n", | |
| "age 0.0279 0.009 3.206 0.001 0.011 0.045\n", | |
| "ever_smoked 0.0716 0.254 0.281 0.778 -0.427 0.570\n", | |
| "peto_exposed -0.1958 0.274 -0.715 0.474 -0.732 0.341\n", | |
| "ever_smoked:peto_exposed 0.4725 0.327 1.446 0.148 -0.168 1.113\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.03 0.38 0.10\n", | |
| "age 1.01 1.05 1.03\n", | |
| "ever_smoked 0.65 1.77 1.07\n", | |
| "peto_exposed 0.48 1.41 0.82\n", | |
| "ever_smoked:peto_exposed 0.85 3.04 1.60\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + ever_smoked*peto_exposed', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.608986\n", | |
| " Iterations 5\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 818\n", | |
| "Model: Logit Df Residuals: 814\n", | |
| "Method: MLE Df Model: 3\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.1207\n", | |
| "Time: 18:50:02 Log-Likelihood: -498.15\n", | |
| "converged: True LL-Null: -566.52\n", | |
| "Covariance Type: nonrobust LLR p-value: 1.918e-29\n", | |
| "=========================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "-----------------------------------------------------------------------------------------\n", | |
| "Intercept -0.7736 0.166 -4.663 0.000 -1.099 -0.448\n", | |
| "peto_exposed 0.0394 0.204 0.193 0.847 -0.361 0.440\n", | |
| "genotype 1.3115 0.239 5.480 0.000 0.842 1.781\n", | |
| "peto_exposed:genotype 0.3604 0.302 1.194 0.233 -0.231 0.952\n", | |
| "=========================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.33 0.64 0.46\n", | |
| "peto_exposed 0.70 1.55 1.04\n", | |
| "genotype 2.32 5.93 3.71\n", | |
| "peto_exposed:genotype 0.79 2.59 1.43\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ peto_exposed*genotype', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 42, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.601830\n", | |
| " Iterations 5\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 818\n", | |
| "Model: Logit Df Residuals: 812\n", | |
| "Method: MLE Df Model: 5\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.1310\n", | |
| "Time: 18:50:02 Log-Likelihood: -492.30\n", | |
| "converged: True LL-Null: -566.52\n", | |
| "Covariance Type: nonrobust LLR p-value: 2.872e-30\n", | |
| "=========================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "-----------------------------------------------------------------------------------------\n", | |
| "Intercept -3.1222 0.753 -4.145 0.000 -4.598 -1.646\n", | |
| "age 0.0285 0.009 3.014 0.003 0.010 0.047\n", | |
| "ever_smoked 0.3028 0.173 1.749 0.080 -0.037 0.642\n", | |
| "peto_exposed 0.0108 0.206 0.053 0.958 -0.393 0.415\n", | |
| "genotype 1.3058 0.242 5.399 0.000 0.832 1.780\n", | |
| "peto_exposed:genotype 0.3649 0.305 1.198 0.231 -0.232 0.962\n", | |
| "=========================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.01 0.19 0.04\n", | |
| "age 1.01 1.05 1.03\n", | |
| "ever_smoked 0.96 1.90 1.35\n", | |
| "peto_exposed 0.67 1.51 1.01\n", | |
| "genotype 2.30 5.93 3.69\n", | |
| "peto_exposed:genotype 0.79 2.62 1.44\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + ever_smoked + peto_exposed*genotype', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 43, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.623366\n", | |
| " Iterations 5\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 479\n", | |
| "Model: Logit Df Residuals: 475\n", | |
| "Method: MLE Df Model: 3\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.002356\n", | |
| "Time: 18:50:03 Log-Likelihood: -298.59\n", | |
| "converged: True LL-Null: -299.30\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.7031\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept -1.0531 0.300 -3.512 0.000 -1.641 -0.465\n", | |
| "ever_smoked 0.3882 0.363 1.068 0.285 -0.324 1.100\n", | |
| "peto_exposed 0.2116 0.384 0.552 0.581 -0.540 0.963\n", | |
| "ever_smoked:peto_exposed -0.2657 0.457 -0.582 0.561 -1.161 0.630\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.19 0.63 0.35\n", | |
| "ever_smoked 0.72 3.00 1.47\n", | |
| "peto_exposed 0.58 2.62 1.24\n", | |
| "ever_smoked:peto_exposed 0.31 1.88 0.77\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ ever_smoked*peto_exposed', data=df[df['genotype'] == 0]) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.620634\n", | |
| " Iterations 5\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 479\n", | |
| "Model: Logit Df Residuals: 474\n", | |
| "Method: MLE Df Model: 4\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.006730\n", | |
| "Time: 18:50:03 Log-Likelihood: -297.28\n", | |
| "converged: True LL-Null: -299.30\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.4022\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept -2.4977 0.954 -2.619 0.009 -4.367 -0.628\n", | |
| "age 0.0193 0.012 1.604 0.109 -0.004 0.043\n", | |
| "ever_smoked 0.3719 0.364 1.021 0.307 -0.342 1.086\n", | |
| "peto_exposed 0.1741 0.385 0.452 0.651 -0.581 0.929\n", | |
| "ever_smoked:peto_exposed -0.2138 0.459 -0.466 0.641 -1.114 0.686\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.01 0.53 0.08\n", | |
| "age 1.00 1.04 1.02\n", | |
| "ever_smoked 0.71 2.96 1.45\n", | |
| "peto_exposed 0.56 2.53 1.19\n", | |
| "ever_smoked:peto_exposed 0.33 1.99 0.81\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + ever_smoked*peto_exposed', data=df[df['genotype'] == 0]) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 45, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.576083\n", | |
| " Iterations 5\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 339\n", | |
| "Model: Logit Df Residuals: 335\n", | |
| "Method: MLE Df Model: 3\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.03332\n", | |
| "Time: 18:50:04 Log-Likelihood: -195.29\n", | |
| "converged: True LL-Null: -202.02\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.003738\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept 1.0296 0.368 2.795 0.005 0.308 1.752\n", | |
| "ever_smoked -0.5516 0.434 -1.272 0.203 -1.401 0.298\n", | |
| "peto_exposed -0.6931 0.471 -1.473 0.141 -1.615 0.229\n", | |
| "ever_smoked:peto_exposed 1.5797 0.556 2.839 0.005 0.489 2.670\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 1.36 5.76 2.80\n", | |
| "ever_smoked 0.25 1.35 0.58\n", | |
| "peto_exposed 0.20 1.26 0.50\n", | |
| "ever_smoked:peto_exposed 1.63 14.44 4.85\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ ever_smoked*peto_exposed', data=df[df['genotype'] > 0]) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 46, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.565499\n", | |
| " Iterations 5\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 339\n", | |
| "Model: Logit Df Residuals: 334\n", | |
| "Method: MLE Df Model: 4\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.05108\n", | |
| "Time: 18:50:04 Log-Likelihood: -191.70\n", | |
| "converged: True LL-Null: -202.02\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.0003737\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept -2.1013 1.239 -1.696 0.090 -4.529 0.327\n", | |
| "age 0.0405 0.015 2.628 0.009 0.010 0.071\n", | |
| "ever_smoked -0.4274 0.441 -0.969 0.333 -1.292 0.437\n", | |
| "peto_exposed -0.6464 0.477 -1.354 0.176 -1.582 0.289\n", | |
| "ever_smoked:peto_exposed 1.5147 0.564 2.686 0.007 0.409 2.620\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.01 1.39 0.12\n", | |
| "age 1.01 1.07 1.04\n", | |
| "ever_smoked 0.27 1.55 0.65\n", | |
| "peto_exposed 0.21 1.34 0.52\n", | |
| "ever_smoked:peto_exposed 1.51 13.73 4.55\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + ever_smoked*peto_exposed', data=df[df['genotype'] > 0]) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 47, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Index(['ipfjes_id', 'case', 'dob', 'age', 'yob', 'agegroup', 'ethnicity',\n", | |
| " 'ever_smoked', 'current_smoker', 'packyrs', 'participant_id', 'centre',\n", | |
| " 'gp_coords', 'centre_coords', 'distfromcentre', 'ct', 'bx', 'fhx',\n", | |
| " 'amiodarone', 'flecainade', 'nitrofurantoin', 'azathioprine',\n", | |
| " 'gefitinib', 'ifosfamide', 'melphalan', 'rituximab', 'mrc0', 'mrc1',\n", | |
| " 'mrc2', 'mrc3', 'mrc4', 'pc_sob', 'pc_cough', 'pc_incidental',\n", | |
| " 'pc_incidental_desc', 'pc_other', 'comments', 'mrc_score',\n", | |
| " 'peto_exposed', 'exposed_stone', 'exposed_wood', 'exposed_metal',\n", | |
| " 'exposed_farm', 'exposed_asbestos', 'peto_dose', 'median_ssec',\n", | |
| " 'fibre_ml_exposure', 'lowest_peto_cat', 'peto_shortlist',\n", | |
| " 'coggan_shortlist', 'mean_pmr', 'highest_pmr', 'meso_pmr_dose',\n", | |
| " 'agecat', 'ethcat', 'ctcat', 'bxcat', 'centre_name',\n", | |
| " 'Aberdeen Royal Infirmary',\n", | |
| " 'Aintree University Hospitals NHS Foundation Trust',\n", | |
| " 'Glasgow Royal Infirmary', 'Guys’ and St Thomas’ NHS Foundation Trust',\n", | |
| " 'Heart of England NHS Foundation Trust',\n", | |
| " 'Imperial College Healthcare NHS Trust',\n", | |
| " 'Leeds Teaching Hospitals NHS Trust', 'Morriston Hospital',\n", | |
| " 'Norfolk and Norwich University Hospitals NHS Foundation Trust',\n", | |
| " 'North Bristol NHS Trust', 'Nottingham University Hospitals NHS Trust',\n", | |
| " 'Papworth Hospital NHS Foundation Trust',\n", | |
| " 'Portsmouth Hospitals NHS Trust',\n", | |
| " 'Royal Devon and Exeter NHS Foundation Trust',\n", | |
| " 'Royal Infirmary of Edinburgh',\n", | |
| " 'Southampton University Hospitals NHS Trust',\n", | |
| " 'Taunton and Somerset NHS Foundation Trust',\n", | |
| " 'The Newcastle Upon Tyne Hospitals NHS Foundation Trust',\n", | |
| " 'University Hospital of South Manchester',\n", | |
| " 'University Hospitals Birmingham NHS Foundation Trust',\n", | |
| " 'Worcestershire Acute Hospitals NHS Trust', 'Arab',\n", | |
| " 'Asian / Asian British', 'Black / African/ Caribbean/ Black British',\n", | |
| " 'Mixed / Multiple ethnic groups', 'Other ethnic group', 'White',\n", | |
| " 'definite UIP', 'no CT', 'other', 'possible UIP', 'ever_drug_exposed',\n", | |
| " 'lowest_peto_cat_reordered', 'median_ssec_int', 'genotype',\n", | |
| " 'minor_allele'],\n", | |
| " dtype='object')" | |
| ] | |
| }, | |
| "execution_count": 47, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.columns" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 48, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df[['ipfjes_id','case','minor_allele','genotype', 'peto_exposed','peto_dose','lowest_peto_cat_reordered', 'fibre_ml_exposure','ever_smoked', 'packyrs','age', 'ethnicity', 'ethcat', 'centre']].to_csv('cosseta.csv', index=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# sensitivity analysis " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 49, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "df = df[df['centre'] != 2] # throw away centre 2 since it contains no genotyped cases" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 50, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.623254\n", | |
| " Iterations 7\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 808\n", | |
| "Model: Logit Df Residuals: 785\n", | |
| "Method: MLE Df Model: 22\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.1005\n", | |
| "Time: 18:50:07 Log-Likelihood: -503.59\n", | |
| "converged: True LL-Null: -559.86\n", | |
| "Covariance Type: nonrobust LLR p-value: 3.864e-14\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept -0.1533 0.320 -0.478 0.632 -0.781 0.475\n", | |
| "C(centre)[T.3.0] -1.1856 0.449 -2.642 0.008 -2.065 -0.306\n", | |
| "C(centre)[T.4.0] -0.0720 0.310 -0.232 0.816 -0.680 0.536\n", | |
| "C(centre)[T.5.0] 0.2419 0.340 0.711 0.477 -0.425 0.909\n", | |
| "C(centre)[T.6.0] 0.0016 0.342 0.005 0.996 -0.668 0.671\n", | |
| "C(centre)[T.7.0] -0.1405 0.429 -0.327 0.744 -0.982 0.701\n", | |
| "C(centre)[T.8.0] -0.0454 0.329 -0.138 0.890 -0.691 0.600\n", | |
| "C(centre)[T.9.0] -1.4948 0.512 -2.922 0.003 -2.498 -0.492\n", | |
| "C(centre)[T.10.0] -1.2839 0.350 -3.664 0.000 -1.971 -0.597\n", | |
| "C(centre)[T.11.0] -0.0920 0.489 -0.188 0.851 -1.050 0.866\n", | |
| "C(centre)[T.12.0] 0.2417 0.803 0.301 0.763 -1.332 1.816\n", | |
| "C(centre)[T.13.0] -0.3763 0.500 -0.753 0.452 -1.356 0.604\n", | |
| "C(centre)[T.14.0] 0.1482 0.418 0.354 0.723 -0.671 0.968\n", | |
| "C(centre)[T.15.0] 0.0146 0.440 0.033 0.974 -0.847 0.876\n", | |
| "C(centre)[T.16.0] 3.4126 1.042 3.275 0.001 1.370 5.455\n", | |
| "C(centre)[T.17.0] -0.2516 0.651 -0.386 0.699 -1.528 1.025\n", | |
| "C(centre)[T.18.0] 2.5925 1.066 2.432 0.015 0.503 4.682\n", | |
| "C(centre)[T.19.0] -0.2381 0.569 -0.418 0.676 -1.354 0.878\n", | |
| "C(centre)[T.20.0] 0.0497 0.364 0.136 0.891 -0.664 0.764\n", | |
| "C(centre)[T.21.0] 1.6392 1.108 1.480 0.139 -0.532 3.810\n", | |
| "ever_smoked 0.1785 0.277 0.645 0.519 -0.364 0.721\n", | |
| "peto_exposed -0.0675 0.297 -0.227 0.820 -0.649 0.514\n", | |
| "ever_smoked:peto_exposed 0.2676 0.354 0.756 0.450 -0.427 0.962\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.46 1.61 0.86\n", | |
| "C(centre)[T.3.0] 0.13 0.74 0.31\n", | |
| "C(centre)[T.4.0] 0.51 1.71 0.93\n", | |
| "C(centre)[T.5.0] 0.65 2.48 1.27\n", | |
| "C(centre)[T.6.0] 0.51 1.96 1.00\n", | |
| "C(centre)[T.7.0] 0.37 2.02 0.87\n", | |
| "C(centre)[T.8.0] 0.50 1.82 0.96\n", | |
| "C(centre)[T.9.0] 0.08 0.61 0.22\n", | |
| "C(centre)[T.10.0] 0.14 0.55 0.28\n", | |
| "C(centre)[T.11.0] 0.35 2.38 0.91\n", | |
| "C(centre)[T.12.0] 0.26 6.15 1.27\n", | |
| "C(centre)[T.13.0] 0.26 1.83 0.69\n", | |
| "C(centre)[T.14.0] 0.51 2.63 1.16\n", | |
| "C(centre)[T.15.0] 0.43 2.40 1.01\n", | |
| "C(centre)[T.16.0] 3.94 233.96 30.34\n", | |
| "C(centre)[T.17.0] 0.22 2.79 0.78\n", | |
| "C(centre)[T.18.0] 1.65 107.98 13.36\n", | |
| "C(centre)[T.19.0] 0.26 2.41 0.79\n", | |
| "C(centre)[T.20.0] 0.51 2.15 1.05\n", | |
| "C(centre)[T.21.0] 0.59 45.15 5.15\n", | |
| "ever_smoked 0.70 2.06 1.20\n", | |
| "peto_exposed 0.52 1.67 0.93\n", | |
| "ever_smoked:peto_exposed 0.65 2.62 1.31\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ C(centre) + ever_smoked*peto_exposed', data=df) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 51, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Warning: Maximum number of iterations has been exceeded.\n", | |
| " Current function value: 0.552271\n", | |
| " Iterations: 35\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 470\n", | |
| "Model: Logit Df Residuals: 447\n", | |
| "Method: MLE Df Model: 22\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.1226\n", | |
| "Time: 18:50:07 Log-Likelihood: -259.57\n", | |
| "converged: False LL-Null: -295.82\n", | |
| "Covariance Type: nonrobust LLR p-value: 2.649e-07\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept -0.5072 0.450 -1.128 0.259 -1.388 0.374\n", | |
| "C(centre)[T.3.0] -3.0007 1.071 -2.801 0.005 -5.100 -0.901\n", | |
| "C(centre)[T.4.0] -0.4225 0.429 -0.985 0.325 -1.263 0.418\n", | |
| "C(centre)[T.5.0] -0.3755 0.469 -0.800 0.424 -1.296 0.545\n", | |
| "C(centre)[T.6.0] -0.8764 0.482 -1.816 0.069 -1.822 0.069\n", | |
| "C(centre)[T.7.0] -0.5951 0.585 -1.018 0.309 -1.741 0.551\n", | |
| "C(centre)[T.8.0] -0.3221 0.444 -0.725 0.468 -1.193 0.548\n", | |
| "C(centre)[T.9.0] -1.5425 0.700 -2.205 0.027 -2.914 -0.171\n", | |
| "C(centre)[T.10.0] -1.5149 0.464 -3.266 0.001 -2.424 -0.606\n", | |
| "C(centre)[T.11.0] -0.2154 0.653 -0.330 0.741 -1.495 1.064\n", | |
| "C(centre)[T.12.0] 0.1198 1.051 0.114 0.909 -1.941 2.180\n", | |
| "C(centre)[T.13.0] -0.3851 0.640 -0.602 0.547 -1.639 0.869\n", | |
| "C(centre)[T.14.0] -0.0364 0.593 -0.061 0.951 -1.199 1.126\n", | |
| "C(centre)[T.15.0] -0.6553 0.675 -0.971 0.331 -1.978 0.667\n", | |
| "C(centre)[T.16.0] 2.9878 1.078 2.771 0.006 0.875 5.101\n", | |
| "C(centre)[T.17.0] -0.9279 0.875 -1.061 0.289 -2.642 0.786\n", | |
| "C(centre)[T.18.0] 25.6231 1.93e+05 0.000 1.000 -3.79e+05 3.79e+05\n", | |
| "C(centre)[T.19.0] -1.7448 1.114 -1.566 0.117 -3.929 0.439\n", | |
| "C(centre)[T.20.0] -0.5751 0.514 -1.118 0.264 -1.584 0.433\n", | |
| "C(centre)[T.21.0] 0.8732 1.266 0.690 0.490 -1.607 3.354\n", | |
| "ever_smoked 0.4247 0.403 1.053 0.292 -0.366 1.215\n", | |
| "peto_exposed 0.4169 0.417 1.000 0.317 -0.400 1.234\n", | |
| "ever_smoked:peto_exposed -0.5584 0.502 -1.113 0.266 -1.542 0.425\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.25 1.45 0.60\n", | |
| "C(centre)[T.3.0] 0.01 0.41 0.05\n", | |
| "C(centre)[T.4.0] 0.28 1.52 0.66\n", | |
| "C(centre)[T.5.0] 0.27 1.72 0.69\n", | |
| "C(centre)[T.6.0] 0.16 1.07 0.42\n", | |
| "C(centre)[T.7.0] 0.18 1.74 0.55\n", | |
| "C(centre)[T.8.0] 0.30 1.73 0.72\n", | |
| "C(centre)[T.9.0] 0.05 0.84 0.21\n", | |
| "C(centre)[T.10.0] 0.09 0.55 0.22\n", | |
| "C(centre)[T.11.0] 0.22 2.90 0.81\n", | |
| "C(centre)[T.12.0] 0.14 8.85 1.13\n", | |
| "C(centre)[T.13.0] 0.19 2.38 0.68\n", | |
| "C(centre)[T.14.0] 0.30 3.08 0.96\n", | |
| "C(centre)[T.15.0] 0.14 1.95 0.52\n", | |
| "C(centre)[T.16.0] 2.40 164.15 19.84\n", | |
| "C(centre)[T.17.0] 0.07 2.19 0.40\n", | |
| "C(centre)[T.18.0] 0.00 inf 134,265,754,158.55\n", | |
| "C(centre)[T.19.0] 0.02 1.55 0.17\n", | |
| "C(centre)[T.20.0] 0.21 1.54 0.56\n", | |
| "C(centre)[T.21.0] 0.20 28.61 2.39\n", | |
| "ever_smoked 0.69 3.37 1.53\n", | |
| "peto_exposed 0.67 3.43 1.52\n", | |
| "ever_smoked:peto_exposed 0.21 1.53 0.57\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/home/drcjar/anaconda3/envs/ipfjes/lib/python3.5/site-packages/statsmodels/base/model.py:568: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n", | |
| " \"Check mle_retvals\", ConvergenceWarning)\n", | |
| "/home/drcjar/anaconda3/envs/ipfjes/lib/python3.5/site-packages/ipykernel_launcher.py:10: RuntimeWarning: overflow encountered in exp\n", | |
| " # Remove the CWD from sys.path while we load stuff.\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ C(centre) + ever_smoked*peto_exposed', data=df[df['genotype'] == 0]) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 52, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "{'warnflag': 1, 'converged': False, 'fopt': 0.5522708783537755, 'iterations': 35, 'Hessian': array([[-1.87391996e-01, -2.03838319e-03, -2.51523230e-02,\n", | |
| " -1.75842094e-02, -1.57700251e-02, -8.71592841e-03,\n", | |
| " -2.13369199e-02, -5.41636587e-03, -1.80127868e-02,\n", | |
| " -6.51389565e-03, -2.12573330e-03, -6.82256037e-03,\n", | |
| " -8.34569887e-03, -6.04801705e-03, -2.00045465e-03,\n", | |
| " -3.18873754e-03, -5.68561746e-14, -1.85863557e-03,\n", | |
| " -1.29700506e-02, -1.41720838e-03, -1.34309334e-01,\n", | |
| " -1.24716615e-01, -9.23409703e-02],\n", | |
| " [-2.03838319e-03, -2.03838319e-03, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -1.09343646e-03,\n", | |
| " -1.35444644e-03, -4.69589213e-04],\n", | |
| " [-2.51523230e-02, -0.00000000e+00, -2.51523230e-02,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -1.50543249e-02,\n", | |
| " -1.50455575e-02, -1.05597900e-02],\n", | |
| " [-1.75842094e-02, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -1.75842094e-02, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -1.04050687e-02,\n", | |
| " -9.89202132e-03, -5.35536179e-03],\n", | |
| " [-1.57700251e-02, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -1.57700251e-02, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -1.06672985e-02,\n", | |
| " -9.37671814e-03, -5.97893227e-03],\n", | |
| " [-8.71592841e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -8.71592841e-03,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -7.36965281e-03,\n", | |
| " -5.46619728e-03, -4.51813184e-03],\n", | |
| " [-2.13369199e-02, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -2.13369199e-02, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -1.68674526e-02,\n", | |
| " -1.83939952e-02, -1.48245412e-02],\n", | |
| " [-5.41636587e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -5.41636587e-03, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -3.82255379e-03,\n", | |
| " -3.81640773e-03, -2.65269556e-03],\n", | |
| " [-1.80127868e-02, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -1.80127868e-02,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -1.28528356e-02,\n", | |
| " -1.13479195e-02, -8.38450647e-03],\n", | |
| " [-6.51389565e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -6.51389565e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -4.58990461e-03,\n", | |
| " -4.06900699e-03, -3.54933022e-03],\n", | |
| " [-2.12573330e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -2.12573330e-03, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -1.59393390e-03,\n", | |
| " -1.06227367e-03, -5.30474271e-04],\n", | |
| " [-6.82256037e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -6.82256037e-03,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -5.88094914e-03,\n", | |
| " -4.87229639e-03, -4.36934009e-03],\n", | |
| " [-8.34569887e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -8.34569887e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -6.29715782e-03,\n", | |
| " -5.76660723e-03, -4.70703381e-03],\n", | |
| " [-6.04801705e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -6.04801705e-03, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -2.64624129e-03,\n", | |
| " -4.50358625e-03, -2.64624129e-03],\n", | |
| " [-2.00045465e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -2.00045465e-03,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -1.59178058e-03,\n", | |
| " -1.17380115e-03, -1.06838426e-03],\n", | |
| " [-3.18873754e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -3.18873754e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -2.77393858e-03,\n", | |
| " -2.35609273e-03, -1.94129377e-03],\n", | |
| " [-5.68561746e-14, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -5.68561746e-14, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -5.68561746e-14,\n", | |
| " -3.96477177e-14, -3.96477177e-14],\n", | |
| " [-1.85863557e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -1.85863557e-03,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -1.42288288e-03,\n", | |
| " -1.16748592e-03, -9.14962486e-04],\n", | |
| " [-1.29700506e-02, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -1.29700506e-02, -0.00000000e+00, -1.20907483e-02,\n", | |
| " -7.30640273e-03, -6.82928294e-03],\n", | |
| " [-1.41720838e-03, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -0.00000000e+00, -0.00000000e+00,\n", | |
| " -0.00000000e+00, -1.41720838e-03, -9.59146699e-04,\n", | |
| " -1.41720838e-03, -9.59146699e-04],\n", | |
| " [-1.34309334e-01, -1.09343646e-03, -1.50543249e-02,\n", | |
| " -1.04050687e-02, -1.06672985e-02, -7.36965281e-03,\n", | |
| " -1.68674526e-02, -3.82255379e-03, -1.28528356e-02,\n", | |
| " -4.58990461e-03, -1.59393390e-03, -5.88094914e-03,\n", | |
| " -6.29715782e-03, -2.64624129e-03, -1.59178058e-03,\n", | |
| " -2.77393858e-03, -5.68561746e-14, -1.42288288e-03,\n", | |
| " -1.20907483e-02, -9.59146699e-04, -1.34309334e-01,\n", | |
| " -9.23409703e-02, -9.23409703e-02],\n", | |
| " [-1.24716615e-01, -1.35444644e-03, -1.50455575e-02,\n", | |
| " -9.89202132e-03, -9.37671814e-03, -5.46619728e-03,\n", | |
| " -1.83939952e-02, -3.81640773e-03, -1.13479195e-02,\n", | |
| " -4.06900699e-03, -1.06227367e-03, -4.87229639e-03,\n", | |
| " -5.76660723e-03, -4.50358625e-03, -1.17380115e-03,\n", | |
| " -2.35609273e-03, -3.96477177e-14, -1.16748592e-03,\n", | |
| " -7.30640273e-03, -1.41720838e-03, -9.23409703e-02,\n", | |
| " -1.24716615e-01, -9.23409703e-02],\n", | |
| " [-9.23409703e-02, -4.69589213e-04, -1.05597900e-02,\n", | |
| " -5.35536179e-03, -5.97893227e-03, -4.51813184e-03,\n", | |
| " -1.48245412e-02, -2.65269556e-03, -8.38450647e-03,\n", | |
| " -3.54933022e-03, -5.30474271e-04, -4.36934009e-03,\n", | |
| " -4.70703381e-03, -2.64624129e-03, -1.06838426e-03,\n", | |
| " -1.94129377e-03, -3.96477177e-14, -9.14962486e-04,\n", | |
| " -6.82928294e-03, -9.59146699e-04, -9.23409703e-02,\n", | |
| " -9.23409703e-02, -9.23409703e-02]]), 'score': array([ 1.70076719e-17, 4.72435330e-19, 0.00000000e+00, 2.71650315e-18,\n", | |
| " -1.88974132e-18, 1.18108832e-18, 2.36217665e-18, 9.44870659e-19,\n", | |
| " 1.41730599e-18, 0.00000000e+00, -2.36217665e-19, 1.41730599e-18,\n", | |
| " 1.88974132e-18, 8.26761827e-19, 4.96057096e-18, 5.90544162e-19,\n", | |
| " 5.68561746e-14, 0.00000000e+00, -9.44870659e-19, 0.00000000e+00,\n", | |
| " 9.44870659e-18, 1.70076719e-17, 8.50383593e-18])}\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(result.mle_retvals)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 53, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Warning: Maximum number of iterations has been exceeded.\n", | |
| " Current function value: 0.497555\n", | |
| " Iterations: 35\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 338\n", | |
| "Model: Logit Df Residuals: 314\n", | |
| "Method: MLE Df Model: 23\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.1623\n", | |
| "Time: 18:50:07 Log-Likelihood: -168.17\n", | |
| "converged: False LL-Null: -200.76\n", | |
| "Covariance Type: nonrobust LLR p-value: 6.619e-06\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept -3.4449 1.476 -2.335 0.020 -6.337 -0.553\n", | |
| "C(centre)[T.3.0] 0.0209 0.677 0.031 0.975 -1.307 1.349\n", | |
| "C(centre)[T.4.0] 0.3200 0.501 0.639 0.523 -0.661 1.301\n", | |
| "C(centre)[T.5.0] 1.2380 0.605 2.045 0.041 0.051 2.425\n", | |
| "C(centre)[T.6.0] 1.8960 0.733 2.587 0.010 0.459 3.333\n", | |
| "C(centre)[T.7.0] 0.9918 0.812 1.222 0.222 -0.599 2.582\n", | |
| "C(centre)[T.8.0] 0.6448 0.557 1.158 0.247 -0.447 1.736\n", | |
| "C(centre)[T.9.0] -1.2746 0.784 -1.626 0.104 -2.811 0.262\n", | |
| "C(centre)[T.10.0] -0.0284 0.635 -0.045 0.964 -1.273 1.217\n", | |
| "C(centre)[T.11.0] 0.6130 0.854 0.718 0.473 -1.060 2.286\n", | |
| "C(centre)[T.12.0] 0.6041 1.343 0.450 0.653 -2.027 3.236\n", | |
| "C(centre)[T.13.0] 0.2125 0.857 0.248 0.804 -1.467 1.892\n", | |
| "C(centre)[T.14.0] 0.2247 0.635 0.354 0.723 -1.020 1.469\n", | |
| "C(centre)[T.15.0] 0.7131 0.707 1.009 0.313 -0.672 2.098\n", | |
| "C(centre)[T.16.0] 23.8352 4.3e+04 0.001 1.000 -8.43e+04 8.43e+04\n", | |
| "C(centre)[T.17.0] 20.7693 1.91e+04 0.001 0.999 -3.74e+04 3.75e+04\n", | |
| "C(centre)[T.18.0] 2.5712 1.152 2.232 0.026 0.313 4.829\n", | |
| "C(centre)[T.19.0] 1.3860 1.173 1.182 0.237 -0.912 3.684\n", | |
| "C(centre)[T.20.0] 1.0472 0.638 1.642 0.101 -0.203 2.297\n", | |
| "C(centre)[T.21.0] 21.7733 3.4e+04 0.001 0.999 -6.65e+04 6.66e+04\n", | |
| "age 0.0480 0.017 2.785 0.005 0.014 0.082\n", | |
| "ever_smoked -0.2134 0.496 -0.431 0.667 -1.185 0.758\n", | |
| "peto_exposed -0.7364 0.537 -1.371 0.170 -1.789 0.316\n", | |
| "ever_smoked:peto_exposed 1.5718 0.636 2.472 0.013 0.326 2.818\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.00 0.58 0.03\n", | |
| "C(centre)[T.3.0] 0.27 3.85 1.02\n", | |
| "C(centre)[T.4.0] 0.52 3.67 1.38\n", | |
| "C(centre)[T.5.0] 1.05 11.30 3.45\n", | |
| "C(centre)[T.6.0] 1.58 28.01 6.66\n", | |
| "C(centre)[T.7.0] 0.55 13.23 2.70\n", | |
| "C(centre)[T.8.0] 0.64 5.68 1.91\n", | |
| "C(centre)[T.9.0] 0.06 1.30 0.28\n", | |
| "C(centre)[T.10.0] 0.28 3.38 0.97\n", | |
| "C(centre)[T.11.0] 0.35 9.84 1.85\n", | |
| "C(centre)[T.12.0] 0.13 25.42 1.83\n", | |
| "C(centre)[T.13.0] 0.23 6.63 1.24\n", | |
| "C(centre)[T.14.0] 0.36 4.35 1.25\n", | |
| "C(centre)[T.15.0] 0.51 8.15 2.04\n", | |
| "C(centre)[T.16.0] 0.00 inf 22,463,517,373.90\n", | |
| "C(centre)[T.17.0] 0.00 inf 1,047,068,906.30\n", | |
| "C(centre)[T.18.0] 1.37 125.10 13.08\n", | |
| "C(centre)[T.19.0] 0.40 39.81 4.00\n", | |
| "C(centre)[T.20.0] 0.82 9.95 2.85\n", | |
| "C(centre)[T.21.0] 0.00 inf 2,857,809,871.09\n", | |
| "age 1.01 1.09 1.05\n", | |
| "ever_smoked 0.31 2.13 0.81\n", | |
| "peto_exposed 0.17 1.37 0.48\n", | |
| "ever_smoked:peto_exposed 1.39 16.74 4.82\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/home/drcjar/anaconda3/envs/ipfjes/lib/python3.5/site-packages/statsmodels/base/model.py:568: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n", | |
| " \"Check mle_retvals\", ConvergenceWarning)\n", | |
| "/home/drcjar/anaconda3/envs/ipfjes/lib/python3.5/site-packages/ipykernel_launcher.py:10: RuntimeWarning: overflow encountered in exp\n", | |
| " # Remove the CWD from sys.path while we load stuff.\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + C(centre) + ever_smoked*peto_exposed', data=df[df['genotype'] > 0]) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 54, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.508455\n", | |
| " Iterations 7\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 338\n", | |
| "Model: Logit Df Residuals: 332\n", | |
| "Method: MLE Df Model: 5\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.1440\n", | |
| "Time: 18:50:07 Log-Likelihood: -171.86\n", | |
| "converged: True LL-Null: -200.76\n", | |
| "Covariance Type: nonrobust LLR p-value: 3.460e-11\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept -3.4483 1.326 -2.601 0.009 -6.047 -0.850\n", | |
| "age 0.0450 0.016 2.780 0.005 0.013 0.077\n", | |
| "distfromcentre 0.0422 0.009 4.702 0.000 0.025 0.060\n", | |
| "ever_smoked -0.1933 0.472 -0.409 0.682 -1.118 0.732\n", | |
| "peto_exposed -0.3531 0.505 -0.700 0.484 -1.342 0.636\n", | |
| "ever_smoked:peto_exposed 1.1723 0.598 1.961 0.050 0.001 2.344\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.00 0.43 0.03\n", | |
| "age 1.01 1.08 1.05\n", | |
| "distfromcentre 1.02 1.06 1.04\n", | |
| "ever_smoked 0.33 2.08 0.82\n", | |
| "peto_exposed 0.26 1.89 0.70\n", | |
| "ever_smoked:peto_exposed 1.00 10.42 3.23\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + distfromcentre + ever_smoked*peto_exposed', data=df[df['genotype'] > 0]) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 55, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Index(['ipfjes_id', 'case', 'dob', 'age', 'yob', 'agegroup', 'ethnicity',\n", | |
| " 'ever_smoked', 'current_smoker', 'packyrs', 'participant_id', 'centre',\n", | |
| " 'gp_coords', 'centre_coords', 'distfromcentre', 'ct', 'bx', 'fhx',\n", | |
| " 'amiodarone', 'flecainade', 'nitrofurantoin', 'azathioprine',\n", | |
| " 'gefitinib', 'ifosfamide', 'melphalan', 'rituximab', 'mrc0', 'mrc1',\n", | |
| " 'mrc2', 'mrc3', 'mrc4', 'pc_sob', 'pc_cough', 'pc_incidental',\n", | |
| " 'pc_incidental_desc', 'pc_other', 'comments', 'mrc_score',\n", | |
| " 'peto_exposed', 'exposed_stone', 'exposed_wood', 'exposed_metal',\n", | |
| " 'exposed_farm', 'exposed_asbestos', 'peto_dose', 'median_ssec',\n", | |
| " 'fibre_ml_exposure', 'lowest_peto_cat', 'peto_shortlist',\n", | |
| " 'coggan_shortlist', 'mean_pmr', 'highest_pmr', 'meso_pmr_dose',\n", | |
| " 'agecat', 'ethcat', 'ctcat', 'bxcat', 'centre_name',\n", | |
| " 'Aberdeen Royal Infirmary',\n", | |
| " 'Aintree University Hospitals NHS Foundation Trust',\n", | |
| " 'Glasgow Royal Infirmary', 'Guys’ and St Thomas’ NHS Foundation Trust',\n", | |
| " 'Heart of England NHS Foundation Trust',\n", | |
| " 'Imperial College Healthcare NHS Trust',\n", | |
| " 'Leeds Teaching Hospitals NHS Trust', 'Morriston Hospital',\n", | |
| " 'Norfolk and Norwich University Hospitals NHS Foundation Trust',\n", | |
| " 'North Bristol NHS Trust', 'Nottingham University Hospitals NHS Trust',\n", | |
| " 'Papworth Hospital NHS Foundation Trust',\n", | |
| " 'Portsmouth Hospitals NHS Trust',\n", | |
| " 'Royal Devon and Exeter NHS Foundation Trust',\n", | |
| " 'Royal Infirmary of Edinburgh',\n", | |
| " 'Southampton University Hospitals NHS Trust',\n", | |
| " 'Taunton and Somerset NHS Foundation Trust',\n", | |
| " 'The Newcastle Upon Tyne Hospitals NHS Foundation Trust',\n", | |
| " 'University Hospital of South Manchester',\n", | |
| " 'University Hospitals Birmingham NHS Foundation Trust',\n", | |
| " 'Worcestershire Acute Hospitals NHS Trust', 'Arab',\n", | |
| " 'Asian / Asian British', 'Black / African/ Caribbean/ Black British',\n", | |
| " 'Mixed / Multiple ethnic groups', 'Other ethnic group', 'White',\n", | |
| " 'definite UIP', 'no CT', 'other', 'possible UIP', 'ever_drug_exposed',\n", | |
| " 'lowest_peto_cat_reordered', 'median_ssec_int', 'genotype',\n", | |
| " 'minor_allele'],\n", | |
| " dtype='object')" | |
| ] | |
| }, | |
| "execution_count": 55, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.columns" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 56, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.555380\n", | |
| " Iterations 6\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 338\n", | |
| "Model: Logit Df Residuals: 327\n", | |
| "Method: MLE Df Model: 10\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.06495\n", | |
| "Time: 18:50:08 Log-Likelihood: -187.72\n", | |
| "converged: True LL-Null: -200.76\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.003637\n", | |
| "=================================================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "-----------------------------------------------------------------------------------------------------------------\n", | |
| "Intercept -2.5496 1.348 -1.891 0.059 -5.193 0.093\n", | |
| "C(lowest_peto_cat_reordered)[T.2] 0.2687 0.750 0.358 0.720 -1.202 1.739\n", | |
| "C(lowest_peto_cat_reordered)[T.3] -0.5650 0.674 -0.839 0.402 -1.885 0.755\n", | |
| "C(lowest_peto_cat_reordered)[T.4] 0.0064 0.767 0.008 0.993 -1.497 1.510\n", | |
| "C(lowest_peto_cat_reordered)[T.5] -1.3882 0.946 -1.467 0.142 -3.243 0.467\n", | |
| "age 0.0445 0.016 2.810 0.005 0.013 0.076\n", | |
| "ever_smoked -0.1762 0.656 -0.269 0.788 -1.461 1.109\n", | |
| "ever_smoked:C(lowest_peto_cat_reordered)[T.2] -0.4357 0.892 -0.489 0.625 -2.183 1.312\n", | |
| "ever_smoked:C(lowest_peto_cat_reordered)[T.3] 1.3211 0.832 1.588 0.112 -0.310 2.952\n", | |
| "ever_smoked:C(lowest_peto_cat_reordered)[T.4] 1.1288 0.915 1.234 0.217 -0.665 2.922\n", | |
| "ever_smoked:C(lowest_peto_cat_reordered)[T.5] 1.6952 1.082 1.567 0.117 -0.425 3.815\n", | |
| "=================================================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.01 1.10 0.08\n", | |
| "C(lowest_peto_cat_reordered)[T.2] 0.30 5.69 1.31\n", | |
| "C(lowest_peto_cat_reordered)[T.3] 0.15 2.13 0.57\n", | |
| "C(lowest_peto_cat_reordered)[T.4] 0.22 4.53 1.01\n", | |
| "C(lowest_peto_cat_reordered)[T.5] 0.04 1.59 0.25\n", | |
| "age 1.01 1.08 1.05\n", | |
| "ever_smoked 0.23 3.03 0.84\n", | |
| "ever_smoked:C(lowest_peto_cat_reordered)[T.2] 0.11 3.71 0.65\n", | |
| "ever_smoked:C(lowest_peto_cat_reordered)[T.3] 0.73 19.14 3.75\n", | |
| "ever_smoked:C(lowest_peto_cat_reordered)[T.4] 0.51 18.58 3.09\n", | |
| "ever_smoked:C(lowest_peto_cat_reordered)[T.5] 0.65 45.39 5.45\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + ever_smoked*C(lowest_peto_cat_reordered)', data=df[df['genotype'] > 0]) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 57, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Index(['ipfjes_id', 'case', 'dob', 'age', 'yob', 'agegroup', 'ethnicity',\n", | |
| " 'ever_smoked', 'current_smoker', 'packyrs', 'participant_id', 'centre',\n", | |
| " 'gp_coords', 'centre_coords', 'distfromcentre', 'ct', 'bx', 'fhx',\n", | |
| " 'amiodarone', 'flecainade', 'nitrofurantoin', 'azathioprine',\n", | |
| " 'gefitinib', 'ifosfamide', 'melphalan', 'rituximab', 'mrc0', 'mrc1',\n", | |
| " 'mrc2', 'mrc3', 'mrc4', 'pc_sob', 'pc_cough', 'pc_incidental',\n", | |
| " 'pc_incidental_desc', 'pc_other', 'comments', 'mrc_score',\n", | |
| " 'peto_exposed', 'exposed_stone', 'exposed_wood', 'exposed_metal',\n", | |
| " 'exposed_farm', 'exposed_asbestos', 'peto_dose', 'median_ssec',\n", | |
| " 'fibre_ml_exposure', 'lowest_peto_cat', 'peto_shortlist',\n", | |
| " 'coggan_shortlist', 'mean_pmr', 'highest_pmr', 'meso_pmr_dose',\n", | |
| " 'agecat', 'ethcat', 'ctcat', 'bxcat', 'centre_name',\n", | |
| " 'Aberdeen Royal Infirmary',\n", | |
| " 'Aintree University Hospitals NHS Foundation Trust',\n", | |
| " 'Glasgow Royal Infirmary', 'Guys’ and St Thomas’ NHS Foundation Trust',\n", | |
| " 'Heart of England NHS Foundation Trust',\n", | |
| " 'Imperial College Healthcare NHS Trust',\n", | |
| " 'Leeds Teaching Hospitals NHS Trust', 'Morriston Hospital',\n", | |
| " 'Norfolk and Norwich University Hospitals NHS Foundation Trust',\n", | |
| " 'North Bristol NHS Trust', 'Nottingham University Hospitals NHS Trust',\n", | |
| " 'Papworth Hospital NHS Foundation Trust',\n", | |
| " 'Portsmouth Hospitals NHS Trust',\n", | |
| " 'Royal Devon and Exeter NHS Foundation Trust',\n", | |
| " 'Royal Infirmary of Edinburgh',\n", | |
| " 'Southampton University Hospitals NHS Trust',\n", | |
| " 'Taunton and Somerset NHS Foundation Trust',\n", | |
| " 'The Newcastle Upon Tyne Hospitals NHS Foundation Trust',\n", | |
| " 'University Hospital of South Manchester',\n", | |
| " 'University Hospitals Birmingham NHS Foundation Trust',\n", | |
| " 'Worcestershire Acute Hospitals NHS Trust', 'Arab',\n", | |
| " 'Asian / Asian British', 'Black / African/ Caribbean/ Black British',\n", | |
| " 'Mixed / Multiple ethnic groups', 'Other ethnic group', 'White',\n", | |
| " 'definite UIP', 'no CT', 'other', 'possible UIP', 'ever_drug_exposed',\n", | |
| " 'lowest_peto_cat_reordered', 'median_ssec_int', 'genotype',\n", | |
| " 'minor_allele'],\n", | |
| " dtype='object')" | |
| ] | |
| }, | |
| "execution_count": 57, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.columns" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 58, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.572279\n", | |
| " Iterations 5\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 302\n", | |
| "Model: Logit Df Residuals: 297\n", | |
| "Method: MLE Df Model: 4\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.06056\n", | |
| "Time: 18:50:08 Log-Likelihood: -172.83\n", | |
| "converged: True LL-Null: -183.97\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.0001760\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept -2.5108 1.308 -1.919 0.055 -5.075 0.053\n", | |
| "age 0.0465 0.016 2.835 0.005 0.014 0.079\n", | |
| "ever_smoked -0.5233 0.462 -1.132 0.258 -1.430 0.383\n", | |
| "peto_exposed -0.8041 0.499 -1.613 0.107 -1.781 0.173\n", | |
| "ever_smoked:peto_exposed 1.6815 0.590 2.850 0.004 0.525 2.838\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.01 1.05 0.08\n", | |
| "age 1.01 1.08 1.05\n", | |
| "ever_smoked 0.24 1.47 0.59\n", | |
| "peto_exposed 0.17 1.19 0.45\n", | |
| "ever_smoked:peto_exposed 1.69 17.08 5.37\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + ever_smoked*peto_exposed', data=df[df['genotype'] == 1]) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 59, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Warning: Maximum number of iterations has been exceeded.\n", | |
| " Current function value: 0.371688\n", | |
| " Iterations: 35\n", | |
| " Logit Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: case No. Observations: 36\n", | |
| "Model: Logit Df Residuals: 31\n", | |
| "Method: MLE Df Model: 4\n", | |
| "Date: Fri, 04 Sep 2020 Pseudo R-squ.: 0.07756\n", | |
| "Time: 18:50:08 Log-Likelihood: -13.381\n", | |
| "converged: False LL-Null: -14.506\n", | |
| "Covariance Type: nonrobust LLR p-value: 0.6898\n", | |
| "============================================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "--------------------------------------------------------------------------------------------\n", | |
| "Intercept 4.2880 5.075 0.845 0.398 -5.658 14.234\n", | |
| "age -0.0439 0.060 -0.735 0.462 -0.161 0.073\n", | |
| "ever_smoked 0.7230 1.684 0.429 0.668 -2.578 4.024\n", | |
| "peto_exposed 22.6988 7.6e+04 0.000 1.000 -1.49e+05 1.49e+05\n", | |
| "ever_smoked:peto_exposed -22.4422 7.6e+04 -0.000 1.000 -1.49e+05 1.49e+05\n", | |
| "============================================================================================\n", | |
| "Odds Ratios\n", | |
| "======================\n", | |
| " 2.5% 97.5% OR\n", | |
| "Intercept 0.00 1,519,826.23 72.82\n", | |
| "age 0.85 1.08 0.96\n", | |
| "ever_smoked 0.08 55.92 2.06\n", | |
| "peto_exposed 0.00 inf 7,210,463,004.56\n", | |
| "ever_smoked:peto_exposed 0.00 inf 0.00\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/home/drcjar/anaconda3/envs/ipfjes/lib/python3.5/site-packages/statsmodels/base/model.py:568: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals\n", | |
| " \"Check mle_retvals\", ConvergenceWarning)\n", | |
| "/home/drcjar/anaconda3/envs/ipfjes/lib/python3.5/site-packages/ipykernel_launcher.py:10: RuntimeWarning: overflow encountered in exp\n", | |
| " # Remove the CWD from sys.path while we load stuff.\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "model = smf.logit('case ~ age + ever_smoked*peto_exposed', data=df[df['genotype'] == 2]) \n", | |
| "result = model.fit()\n", | |
| "print(result.summary())\n", | |
| "print (\"Odds Ratios\")\n", | |
| "print (\"======================\")\n", | |
| "params = result.params\n", | |
| "conf = result.conf_int()\n", | |
| "conf['OR'] = params\n", | |
| "conf.columns = ['2.5%', '97.5%', 'OR']\n", | |
| "print(np.exp(conf))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 60, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>case</th>\n", | |
| " <th>age</th>\n", | |
| " <th>ever_smoked</th>\n", | |
| " <th>peto_exposed</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>case</th>\n", | |
| " <td>1.00</td>\n", | |
| " <td>-0.15</td>\n", | |
| " <td>0.04</td>\n", | |
| " <td>0.14</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>age</th>\n", | |
| " <td>-0.15</td>\n", | |
| " <td>1.00</td>\n", | |
| " <td>-0.18</td>\n", | |
| " <td>-0.19</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>ever_smoked</th>\n", | |
| " <td>0.04</td>\n", | |
| " <td>-0.18</td>\n", | |
| " <td>1.00</td>\n", | |
| " <td>0.26</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>peto_exposed</th>\n", | |
| " <td>0.14</td>\n", | |
| " <td>-0.19</td>\n", | |
| " <td>0.26</td>\n", | |
| " <td>1.00</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " case age ever_smoked peto_exposed\n", | |
| "case 1.00 -0.15 0.04 0.14\n", | |
| "age -0.15 1.00 -0.18 -0.19\n", | |
| "ever_smoked 0.04 -0.18 1.00 0.26\n", | |
| "peto_exposed 0.14 -0.19 0.26 1.00" | |
| ] | |
| }, | |
| "execution_count": 60, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df[df['genotype'] == 2][['case', 'age', 'ever_smoked', 'peto_exposed']].corr()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.4" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment