Skip to content

Instantly share code, notes, and snippets.

@y-mitsui
Created May 24, 2017 16:56
Show Gist options
  • Select an option

  • Save y-mitsui/6ed54279860ee365b5d7f395855634a2 to your computer and use it in GitHub Desktop.

Select an option

Save y-mitsui/6ed54279860ee365b5d7f395855634a2 to your computer and use it in GitHub Desktop.
'''
Created on 2017/05/24
'''
from sklearn.grid_search import GridSearchCV
import numpy as np
from sklearn.cross_validation import KFold
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor
from sklearn.linear_model import LassoCV
from sklearn import tree
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Ridge, ElasticNet
from ensemble_regressor import EnsembleRegressor
def original_cross_val_score(estimator, sample_X, sample_y):
est_y = []
true_y = []
for train_index, test_index in KFold(sample_X.shape[0], 5):
X_train2, X_test2, y_train2, y_test2 = sample_X[train_index], sample_X[test_index], sample_y[train_index], sample_y[test_index]
estimator.fit(X_train2, y_train2)
est_y.extend(estimator.predict(X_test2).tolist())
true_y.extend(y_test2.tolist())
return -mean_squared_error(est_y, true_y)
class Predictor(object):
'''
classdocs
'''
def __init__(self):
'''
Constructor
'''
self.estimators = [
{"context": tree.DecisionTreeRegressor(), "tuned_parameters": [{'max_depth': range(1, 20, 2)}], "name": "decision tree"},
{"context": RandomForestRegressor(n_estimators=50), "tuned_parameters": [{'max_depth': range(8, 20, 2)}], "name": "random forest"},
{"context": LassoCV(normalize=True), "tuned_parameters": [{'eps': np.linspace(4e-2, 1, 20)}], "name": "lasso"},
{"context": KNeighborsRegressor(), "tuned_parameters": [{'n_neighbors': range(1, 50, 10)}], "name": "KNN"},
{"context": LinearRegression(), "tuned_parameters": [{'normalize': [True, False]}], "name": "Linear"},
{"context": Ridge(), "tuned_parameters": [{'alpha': np.linspace(2e-2, 1e+2, 40)}], "name": "Ridge"},
{"context": ElasticNet(), "tuned_parameters": [{'alpha': np.linspace(1e-6, 1e+2, 10), 'l1_ratio':np.linspace(1e-6, 1, 10)}], "name": "ElasticNet"},
#{"context": EnsembleRegressor(), "name": "EnsembleRegressor"},
{"context": BaggingRegressor(tree.DecisionTreeRegressor(max_depth=12), max_samples=0.9, max_features=0.5, n_estimators=50), "name": "Bagging"},
{"context": BaggingRegressor(ElasticNet(alpha=1e-3), max_samples=0.9, max_features=0.5, n_estimators=50), "name": "Elastic Bagging"},
]
def fit(self, sample_X, sample_y):
best_score = float('-inf')
for estimator in self.estimators:
print "==========%s========"%(estimator['name'])
if estimator.get('tuned_parameters') != None:
if estimator.get('tuner') != None:
context = estimator['tuner']
else:
context = GridSearchCV(estimator['context'], estimator['tuned_parameters'], cv=5, n_jobs=-1, scoring="neg_mean_squared_error")
context.fit(sample_X, sample_y)
print "best_params", context.best_params_
cur_score = context.best_score_
else:
context = estimator["context"]
cur_score = original_cross_val_score(context, sample_X, sample_y)
print "cross_val_score", cur_score
if best_score < cur_score:
best_score = cur_score
self.best_estimator = context
def predict(self, sample_X):
return self.best_estimator.predict(sample_X)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment