Skip to content

Instantly share code, notes, and snippets.

@lucasastorian
Last active October 13, 2021 11:02
Show Gist options
  • Select an option

  • Save lucasastorian/e9d21fbafa4357b1f48797f457f55aec to your computer and use it in GitHub Desktop.

Select an option

Save lucasastorian/e9d21fbafa4357b1f48797f457f55aec to your computer and use it in GitHub Desktop.
import numpy as np
from sklearn.metrics import log_loss
baseline_scores = pd.Series()
permuted_scores = pd.DataFrame(columns=features)
for i, (train, test) in enumerate(cv.split()):
X_train, y_train, w_train = X.iloc[train, :], y.iloc[train], sample_weights.iloc[train]
X_val, y_val = X.iloc[val, :], y.iloc[val]
fit = rf.fit(X=X_train, y=y_train, sample_weight=w_train.values)
y_pred = fit.predict_proba(X_val)
baseline_scores.iloc[i] = -log_loss(y_val, y_pred, label=rf.classes_)
for feature in features:
X_val_permuted = X_val.copy(deep=True)
np.random.shuffle(X_val_permuted[feature].values)
y_pred = fit.predict_proba(X_val_permuted)
permuted_scores.loc[i, feauture] = -log_loss(y_val, y_pred, labels=rf.classes_)
feature_importances = (-baseline_scores).add(permuted_scores, axis=1)
feature_importances = feature_importances / -permuted_scores
feature_importances = pd.concat({'mean': feature_importances.mean(), 'std': feature_importances.std()*permuted_scores.shape[0]**-.5}, axis=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment