Source code for flexcode.regression_models

import numpy as np

from .helpers import params_dict_optim_decision, params_name_format

try:
    import xgboost as xgb

[docs] XGBOOST_AVAILABLE = True
except ImportError: XGBOOST_AVAILABLE = False try: import sklearn.ensemble import sklearn.linear_model import sklearn.model_selection import sklearn.multioutput import sklearn.neighbors
[docs] SKLEARN_AVAILABLE = True
except ImportError: SKLEARN_AVAILABLE = False
[docs]class FlexCodeRegression(object): def __init__(self, max_basis): self.max_basis = max_basis
[docs] def fit(self, x_train, z_basis, weight): pass
[docs] def predict(self, x_new): pass
[docs]class NN(FlexCodeRegression): def __init__(self, max_basis, params, *args, **kwargs): if not SKLEARN_AVAILABLE: raise Exception("NN requires scikit-learn to be installed") super(NN, self).__init__(max_basis) # Historically, we have used 'k' to indicate the number of neighbors, so # this just puts the right notation for KNeighborsRegressor if "k" in params: params["n_neighbors"] = params["k"] del params["k"] params_opt, opt_flag = params_dict_optim_decision(params, multi_output=True) self.params = params_opt self.models = ( None if opt_flag else sklearn.multioutput.MultiOutputRegressor( sklearn.neighbors.KNeighborsRegressor(**self.params), n_jobs=-1 ) )
[docs] def fit(self, x_train, z_basis, weight): if weight is not None: raise Exception("Weights not implemented for NN") if self.models is None: self.cv_optim(x_train, z_basis) self.models.fit(x_train, z_basis)
[docs] def cv_optim(self, x_train, z_basis): nn_obj = sklearn.multioutput.MultiOutputRegressor(sklearn.neighbors.KNeighborsRegressor(), n_jobs=-1) clf = sklearn.model_selection.GridSearchCV( nn_obj, self.params, cv=5, scoring="neg_mean_squared_error", verbose=2 ) clf.fit(x_train, z_basis) self.params = params_name_format(clf.best_params_, str_rem="estimator__") self.models = sklearn.multioutput.MultiOutputRegressor( sklearn.neighbors.KNeighborsRegressor(**self.params), n_jobs=-1 )
[docs] def predict(self, x_test): coefs = self.models.predict(x_test) return coefs
[docs]class RandomForest(FlexCodeRegression): def __init__(self, max_basis, params, *args, **kwargs): if not SKLEARN_AVAILABLE: raise Exception("RandomForest requires scikit-learn to be installed") super(RandomForest, self).__init__(max_basis) params_opt, opt_flag = params_dict_optim_decision(params, multi_output=True) self.params = params_opt self.models = ( None if opt_flag else sklearn.multioutput.MultiOutputRegressor( sklearn.ensemble.RandomForestRegressor(**self.params), n_jobs=-1 ) )
[docs] def fit(self, x_train, z_basis, weight=None): if self.models is None: self.cv_optim(x_train, z_basis, weight) self.models.fit(x_train, z_basis, sample_weight=weight)
[docs] def cv_optim(self, x_train, z_basis, weight=None): rf_obj = sklearn.multioutput.MultiOutputRegressor(sklearn.ensemble.RandomForestRegressor(), n_jobs=-1) clf = sklearn.model_selection.GridSearchCV( rf_obj, self.params, cv=5, scoring="neg_mean_squared_error", verbose=2 ) clf.fit(x_train, z_basis, sample_weight=weight) self.params = params_name_format(clf.best_params_, str_rem="estimator__") self.models = sklearn.multioutput.MultiOutputRegressor( sklearn.ensemble.RandomForestRegressor(**self.params), n_jobs=-1 )
[docs] def predict(self, x_test): coefs = self.models.predict(x_test) return coefs
[docs]class XGBoost(FlexCodeRegression): def __init__(self, max_basis, params, *args, **kwargs): if not XGBOOST_AVAILABLE: raise Exception("XGBoost requires xgboost to be installed") super(XGBoost, self).__init__(max_basis) # Historically, people have used `eta` for `learning_rate` - taking that # into account if "eta" in params: params["learning_rate"] = params["eta"] del params["eta"] # Also, set the default values if not passed params["max_depth"] = params.get("max_depth", 6) params["learning_rate"] = params.get("learning_rate", 0.3) params["silent"] = params.get("silent", 1) params["objective"] = params.get("objective", "reg:linear") params_opt, opt_flag = params_dict_optim_decision(params, multi_output=True) self.params = params_opt self.models = ( None if opt_flag else sklearn.multioutput.MultiOutputRegressor(xgb.XGBRegressor(**self.params), n_jobs=-1) )
[docs] def fit(self, x_train, z_basis, weight=None): if self.models is None: self.cv_optim(x_train, z_basis, weight) self.models.fit(x_train, z_basis, sample_weight=weight)
[docs] def cv_optim(self, x_train, z_basis, weight=None): xgb_obj = sklearn.multioutput.MultiOutputRegressor(xgb.XGBRegressor(), n_jobs=-1) clf = sklearn.model_selection.GridSearchCV( xgb_obj, self.params, cv=5, scoring="neg_mean_squared_error", verbose=2 ) clf.fit(x_train, z_basis, sample_weight=weight) self.params = params_name_format(clf.best_params_, str_rem="estimator__") self.models = sklearn.multioutput.MultiOutputRegressor(xgb.XGBRegressor(**self.params), n_jobs=-1)
[docs] def predict(self, x_test): coefs = self.models.predict(x_test) return coefs
[docs]class Lasso(FlexCodeRegression): def __init__(self, max_basis, params, *args, **kwargs): if not SKLEARN_AVAILABLE: raise Exception("Lasso requires scikit-learn to be installed") super(Lasso, self).__init__(max_basis) # Also, set the default values if not passed params["alpha"] = params.get("alpha", 1.0) params["l1_ratio"] = params.get("l1_ratio", 1.0) params_opt, opt_flag = params_dict_optim_decision(params, multi_output=True) self.params = params_opt self.models = ( None if opt_flag else sklearn.multioutput.MultiOutputRegressor( sklearn.linear_model.ElasticNet(**self.params), n_jobs=-1 ) )
[docs] def fit(self, x_train, z_basis, weight=None): if weight is not None: raise ValueError( "Weights are not supported in the ElasticNet/Lasso " "implementation in scikit-learn." ) if self.models is None: self.cv_optim(x_train, z_basis) self.models.fit(x_train, z_basis)
[docs] def cv_optim(self, x_train, z_basis): lasso_obj = sklearn.multioutput.MultiOutputRegressor(sklearn.linear_model.ElasticNet(), n_jobs=-1) clf = sklearn.model_selection.GridSearchCV( lasso_obj, self.params, cv=5, scoring="neg_mean_squared_error", verbose=2 ) clf.fit(x_train, z_basis) self.params = params_name_format(clf.best_params_, str_rem="estimator__") self.models = sklearn.multioutput.MultiOutputRegressor( sklearn.linear_model.ElasticNet(**self.params), n_jobs=-1 )
[docs] def predict(self, x_test): coefs = self.models.predict(x_test) return coefs
[docs]class CustomModel(FlexCodeRegression): def __init__(self, max_basis, params, custom_model, *args, **kwargs): if not SKLEARN_AVAILABLE: raise Exception("Custom class requires scikit-learn to be installed") super(CustomModel, self).__init__(max_basis) params_opt, opt_flag = params_dict_optim_decision(params, multi_output=True) self.params = params_opt self.base_model = custom_model self.models = ( None if opt_flag else sklearn.multioutput.MultiOutputRegressor(self.base_model(**self.params), n_jobs=-1) )
[docs] def fit(self, x_train, z_basis, weight=None): # Given it's a custom class, work would need to be done # for sample weights - for now this is not implemented. if weight: raise NotImplementedError("Weights for custom class not implemented.") if self.models is None: self.cv_optim(x_train, z_basis) self.models.fit(x_train, z_basis)
[docs] def cv_optim(self, x_train, z_basis): custom_obj = sklearn.multioutput.MultiOutputRegressor(self.base_model(), n_jobs=-1) clf = sklearn.model_selection.GridSearchCV( custom_obj, self.params, cv=5, scoring="neg_mean_squared_error", verbose=2 ) clf.fit(x_train, z_basis) self.params = params_name_format(clf.best_params_, str_rem="estimator__") self.models = sklearn.multioutput.MultiOutputRegressor(self.base_model(**self.params), n_jobs=-1)
[docs] def predict(self, x_test): coefs = self.models.predict(x_test) return coefs