Created
May 24, 2017 16:56
-
-
Save y-mitsui/6ed54279860ee365b5d7f395855634a2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ''' | |
| Created on 2017/05/24 | |
| ''' | |
| from sklearn.grid_search import GridSearchCV | |
| import numpy as np | |
| from sklearn.cross_validation import KFold | |
| from sklearn.metrics import mean_squared_error | |
| from sklearn.ensemble import RandomForestRegressor, BaggingRegressor | |
| from sklearn.linear_model import LassoCV | |
| from sklearn import tree | |
| from sklearn.neighbors import KNeighborsRegressor | |
| from sklearn.linear_model import LinearRegression, Ridge, ElasticNet | |
| from ensemble_regressor import EnsembleRegressor | |
| def original_cross_val_score(estimator, sample_X, sample_y): | |
| est_y = [] | |
| true_y = [] | |
| for train_index, test_index in KFold(sample_X.shape[0], 5): | |
| X_train2, X_test2, y_train2, y_test2 = sample_X[train_index], sample_X[test_index], sample_y[train_index], sample_y[test_index] | |
| estimator.fit(X_train2, y_train2) | |
| est_y.extend(estimator.predict(X_test2).tolist()) | |
| true_y.extend(y_test2.tolist()) | |
| return -mean_squared_error(est_y, true_y) | |
| class Predictor(object): | |
| ''' | |
| classdocs | |
| ''' | |
| def __init__(self): | |
| ''' | |
| Constructor | |
| ''' | |
| self.estimators = [ | |
| {"context": tree.DecisionTreeRegressor(), "tuned_parameters": [{'max_depth': range(1, 20, 2)}], "name": "decision tree"}, | |
| {"context": RandomForestRegressor(n_estimators=50), "tuned_parameters": [{'max_depth': range(8, 20, 2)}], "name": "random forest"}, | |
| {"context": LassoCV(normalize=True), "tuned_parameters": [{'eps': np.linspace(4e-2, 1, 20)}], "name": "lasso"}, | |
| {"context": KNeighborsRegressor(), "tuned_parameters": [{'n_neighbors': range(1, 50, 10)}], "name": "KNN"}, | |
| {"context": LinearRegression(), "tuned_parameters": [{'normalize': [True, False]}], "name": "Linear"}, | |
| {"context": Ridge(), "tuned_parameters": [{'alpha': np.linspace(2e-2, 1e+2, 40)}], "name": "Ridge"}, | |
| {"context": ElasticNet(), "tuned_parameters": [{'alpha': np.linspace(1e-6, 1e+2, 10), 'l1_ratio':np.linspace(1e-6, 1, 10)}], "name": "ElasticNet"}, | |
| #{"context": EnsembleRegressor(), "name": "EnsembleRegressor"}, | |
| {"context": BaggingRegressor(tree.DecisionTreeRegressor(max_depth=12), max_samples=0.9, max_features=0.5, n_estimators=50), "name": "Bagging"}, | |
| {"context": BaggingRegressor(ElasticNet(alpha=1e-3), max_samples=0.9, max_features=0.5, n_estimators=50), "name": "Elastic Bagging"}, | |
| ] | |
| def fit(self, sample_X, sample_y): | |
| best_score = float('-inf') | |
| for estimator in self.estimators: | |
| print "==========%s========"%(estimator['name']) | |
| if estimator.get('tuned_parameters') != None: | |
| if estimator.get('tuner') != None: | |
| context = estimator['tuner'] | |
| else: | |
| context = GridSearchCV(estimator['context'], estimator['tuned_parameters'], cv=5, n_jobs=-1, scoring="neg_mean_squared_error") | |
| context.fit(sample_X, sample_y) | |
| print "best_params", context.best_params_ | |
| cur_score = context.best_score_ | |
| else: | |
| context = estimator["context"] | |
| cur_score = original_cross_val_score(context, sample_X, sample_y) | |
| print "cross_val_score", cur_score | |
| if best_score < cur_score: | |
| best_score = cur_score | |
| self.best_estimator = context | |
| def predict(self, sample_X): | |
| return self.best_estimator.predict(sample_X) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment