y-mitsui/Predictor.py

## Predictor.py
'''
Created on 2017/05/24

'''

from sklearn.grid_search import GridSearchCV
import numpy as np
from sklearn.cross_validation import KFold
from sklearn.metrics import mean_squared_error

from sklearn.ensemble import RandomForestRegressor, BaggingRegressor
from sklearn.linear_model import LassoCV
from sklearn import tree
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Ridge, ElasticNet
from ensemble_regressor import EnsembleRegressor

def original_cross_val_score(estimator, sample_X, sample_y):
    est_y = []
    true_y = []
    for train_index, test_index in KFold(sample_X.shape[0], 5):
        X_train2, X_test2, y_train2, y_test2 = sample_X[train_index], sample_X[test_index], sample_y[train_index], sample_y[test_index]
        estimator.fit(X_train2, y_train2)
        est_y.extend(estimator.predict(X_test2).tolist())
        true_y.extend(y_test2.tolist())
    return -mean_squared_error(est_y, true_y)

class Predictor(object):
    '''
    classdocs
    '''

    def __init__(self):
        '''
        Constructor
        '''
        self.estimators = [
            {"context": tree.DecisionTreeRegressor(), "tuned_parameters": [{'max_depth': range(1, 20, 2)}], "name": "decision tree"},
            {"context": RandomForestRegressor(n_estimators=50), "tuned_parameters": [{'max_depth': range(8, 20, 2)}], "name": "random forest"},
            {"context": LassoCV(normalize=True), "tuned_parameters": [{'eps': np.linspace(4e-2, 1, 20)}], "name": "lasso"},
            {"context": KNeighborsRegressor(), "tuned_parameters": [{'n_neighbors': range(1, 50, 10)}], "name": "KNN"},
            {"context": LinearRegression(), "tuned_parameters": [{'normalize': [True, False]}], "name": "Linear"},
            {"context": Ridge(), "tuned_parameters": [{'alpha': np.linspace(2e-2, 1e+2, 40)}], "name": "Ridge"},
            {"context": ElasticNet(), "tuned_parameters": [{'alpha': np.linspace(1e-6, 1e+2, 10), 'l1_ratio':np.linspace(1e-6, 1, 10)}], "name": "ElasticNet"},
            #{"context": EnsembleRegressor(), "name": "EnsembleRegressor"},
            {"context": BaggingRegressor(tree.DecisionTreeRegressor(max_depth=12), max_samples=0.9, max_features=0.5, n_estimators=50), "name": "Bagging"},
            {"context": BaggingRegressor(ElasticNet(alpha=1e-3), max_samples=0.9, max_features=0.5, n_estimators=50), "name": "Elastic Bagging"},

        ]


    def fit(self, sample_X, sample_y):
        best_score = float('-inf')
        for estimator in self.estimators:
            print "==========%s========"%(estimator['name'])

            if estimator.get('tuned_parameters') != None:
                if estimator.get('tuner') != None:
                    context = estimator['tuner']
                else:
                    context = GridSearchCV(estimator['context'], estimator['tuned_parameters'], cv=5, n_jobs=-1, scoring="neg_mean_squared_error")
                context.fit(sample_X, sample_y)
                print "best_params", context.best_params_
                cur_score = context.best_score_
            else:
                context = estimator["context"]
                cur_score = original_cross_val_score(context, sample_X, sample_y)

            print "cross_val_score", cur_score

            if best_score < cur_score:
                best_score = cur_score
                self.best_estimator = context

    def predict(self, sample_X):
        return self.best_estimator.predict(sample_X)
	'''
	Created on 2017/05/24

	'''

	from sklearn.grid_search import GridSearchCV
	import numpy as np
	from sklearn.cross_validation import KFold
	from sklearn.metrics import mean_squared_error

	from sklearn.ensemble import RandomForestRegressor, BaggingRegressor
	from sklearn.linear_model import LassoCV
	from sklearn import tree
	from sklearn.neighbors import KNeighborsRegressor
	from sklearn.linear_model import LinearRegression, Ridge, ElasticNet
	from ensemble_regressor import EnsembleRegressor

	def original_cross_val_score(estimator, sample_X, sample_y):
	est_y = []
	true_y = []
	for train_index, test_index in KFold(sample_X.shape[0], 5):
	X_train2, X_test2, y_train2, y_test2 = sample_X[train_index], sample_X[test_index], sample_y[train_index], sample_y[test_index]
	estimator.fit(X_train2, y_train2)
	est_y.extend(estimator.predict(X_test2).tolist())
	true_y.extend(y_test2.tolist())
	return -mean_squared_error(est_y, true_y)

	class Predictor(object):
	'''
	classdocs
	'''

	def __init__(self):
	'''
	Constructor
	'''
	self.estimators = [
	{"context": tree.DecisionTreeRegressor(), "tuned_parameters": [{'max_depth': range(1, 20, 2)}], "name": "decision tree"},
	{"context": RandomForestRegressor(n_estimators=50), "tuned_parameters": [{'max_depth': range(8, 20, 2)}], "name": "random forest"},
	{"context": LassoCV(normalize=True), "tuned_parameters": [{'eps': np.linspace(4e-2, 1, 20)}], "name": "lasso"},
	{"context": KNeighborsRegressor(), "tuned_parameters": [{'n_neighbors': range(1, 50, 10)}], "name": "KNN"},
	{"context": LinearRegression(), "tuned_parameters": [{'normalize': [True, False]}], "name": "Linear"},
	{"context": Ridge(), "tuned_parameters": [{'alpha': np.linspace(2e-2, 1e+2, 40)}], "name": "Ridge"},
	{"context": ElasticNet(), "tuned_parameters": [{'alpha': np.linspace(1e-6, 1e+2, 10), 'l1_ratio':np.linspace(1e-6, 1, 10)}], "name": "ElasticNet"},
	#{"context": EnsembleRegressor(), "name": "EnsembleRegressor"},
	{"context": BaggingRegressor(tree.DecisionTreeRegressor(max_depth=12), max_samples=0.9, max_features=0.5, n_estimators=50), "name": "Bagging"},
	{"context": BaggingRegressor(ElasticNet(alpha=1e-3), max_samples=0.9, max_features=0.5, n_estimators=50), "name": "Elastic Bagging"},

	]


	def fit(self, sample_X, sample_y):
	best_score = float('-inf')
	for estimator in self.estimators:
	print "==========%s========"%(estimator['name'])

	if estimator.get('tuned_parameters') != None:
	if estimator.get('tuner') != None:
	context = estimator['tuner']
	else:
	context = GridSearchCV(estimator['context'], estimator['tuned_parameters'], cv=5, n_jobs=-1, scoring="neg_mean_squared_error")
	context.fit(sample_X, sample_y)
	print "best_params", context.best_params_
	cur_score = context.best_score_
	else:
	context = estimator["context"]
	cur_score = original_cross_val_score(context, sample_X, sample_y)

	print "cross_val_score", cur_score

	if best_score < cur_score:
	best_score = cur_score
	self.best_estimator = context

	def predict(self, sample_X):
	return self.best_estimator.predict(sample_X)
No results found