Last active
November 27, 2021 16:34
-
-
Save lucasastorian/d2d7cea6082ab3c5277f5300bd574201 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import numpy as np | |
| total_mdi_scores = pd.DataFrame(columns=features) | |
| for i, (train, test) in enumerate(cv.split()): | |
| rf.fit(X[train], y[train], sample_weights=weights[train]) | |
| # Create a DataFrame with the total loss per feature (columns) per tree (rows) | |
| mdi_scores = [i: tree.feature_importances_ for i, tree in enumerate(rf.estimators_)] | |
| mdi_scores = pd.DataFrame.from_dict(mdi_scores, orient="index") | |
| mdi_scores.columns = features | |
| mdi_scores = mdi_scores.replace(0, np.nan) | |
| total_mdi_scores = total_mdi_scores.append(mdi_scores) | |
| # Calculate the mean and std of the loss reduction for each feature | |
| mdi_scores = pd.concat({'mean': mdi_scores.mean(), 'std': mdi_scores.std()*mdi_scores.shape[0]**-.5}, axis=1) | |
| # Divide the mean loss reduction of each feature by the total loss reduction accross all features | |
| mdi_scores /= mdi_scores['mean'].sum() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment