1 from ..metrics
import performance
2 from ..utils
import find_wavelength, ignore_warnings
3 from ..meta
import get_sensor_bands
4 from ..transformers
import TransformerPipeline, LogTransformer
5 from .utils
import get_benchmark_models, GlobalRandomManager
7 from collections
import defaultdict
as dd
8 from functools
import partial, update_wrapper
14 def get_models(wavelengths, sensor, product, debug=False, allow_opt=False, method=None, **kwargs):
15 ''' Retrieve all benchmark functions from the appropriate product
16 directory. Import each function with "model" in the function
17 name, ensure any necessary parameters have a default value
18 available, and test whether the function can be run with the
19 given wavelengths. A template folder for new algorithms is
20 available in the Benchmarks directory.
23 sensor = sensor.split(
'-')[0]
27 'wavelengths' : wavelengths,
32 for name, model
in models.items():
33 sample_input = np.ones((1, len(wavelengths)))
34 model_kwargs = dict(kwargs)
37 if getattr(model,
'has_default',
False):
38 model_kwargs.update( dict(zip(model.opt_vars, [1]*len(model.opt_vars))) )
41 output =
model(sample_input, **model_kwargs)
42 assert(output
is not None), f
'Output for {name} is None'
43 assert(
not isinstance(output, dict)), f
'"{product}" not found in the outputs of {name}'
44 valid[name] = update_wrapper(
partial(model, **kwargs), model)
45 except Exception
as e:
46 if debug: print(f
'Exception for function {name}: {e}\n{traceback.format_exc()}')
51 def run_benchmarks(sensor, x_test, y_test=None, x_train=None, y_train=None, slices=None, args=None,
52 *, product='chl', bands=None, verbose=False,
53 return_rs=True, return_ml=False, return_opt=False,
54 kwargs_rs={}, kwargs_ml={}, kwargs_opt={}):
56 def assert_same_features(a, b, label):
57 assert(a
is None or b
is None or a.shape[1] == b.shape[1]), \
58 f
'Differing number of {label} features: {a.shape[1]} vs {b.shape[1]}'
60 slices = slices
or {p: slice(
None)
for p
in np.atleast_1d(product)}
65 assert_same_features(x_test, x_train,
'x')
66 assert_same_features(y_test, y_train,
'y')
67 assert_same_features(x_test, np.atleast_2d(bands), f
'{sensor} band')
69 if (return_ml
or return_opt)
and (x_train
is None or y_train
is None):
70 raise Exception(
'Training data must be passed to use ML/Opt models')
73 products_rs = [
'chl',
'tss',
'cdom',
'a',
'aph',
'ap',
'ag',
'aph',
'adg',
'b',
'bbp']
74 products_ml = [
'chl',
'tss',
'cdom']
75 products_opt = [
'chl',
'tss',
'cdom']
78 for product
in slices:
88 'y_train' : y_train[:, slices[product]]
if y_train
is not None else None,
89 'y_test' : y_test[:, slices[product]]
if y_test
is not None else None,
92 for bench_return, bench_products, bench_kwargs, bench_function
in [
93 (return_rs, products_rs, dict(kwargs_rs ), _bench_rs ),
94 (return_ml, products_ml, dict(kwargs_ml ), _bench_ml ),
95 (return_opt, products_opt, dict(kwargs_opt), _bench_opt),
97 if bench_return
and product
in bench_products:
98 bench_kwargs.update(kwargs_default)
99 bench[product].
update( bench_function(**bench_kwargs) )
103 def _create_estimates(model, inputs, postprocess=None, preprocess=None, **kwargs):
104 if postprocess
is None: postprocess =
lambda x: x
105 if preprocess
is None: preprocess =
lambda x: x
107 model = preprocess(model)
or model
108 outputs = getattr(model,
'predict', model)(inputs.copy())
109 estimates = postprocess(outputs.flatten()[:,
None])
111 if kwargs.get(
'verbose',
False)
and kwargs.get(
'y_test',
None)
is not None:
112 print(
performance(model.__name__, kwargs[
'y_test'], estimates) )
116 def _bench_rs(sensor, bands, x_test, product='chl', method=None, tol=15, allow_opt=False, **kwargs):
117 postps =
lambda x: (np.copyto(x, np.nan, where=x < 0)
or x)
if product ==
'chl' else x
118 create =
lambda f: _create_estimates(f, x_test, postps, **kwargs)
119 models =
get_models(bands, sensor, product, method=method, tol=tol, allow_opt=allow_opt)
120 return {name: create(model)
for name, model
in models.items()}
123 def _bench_opt(sensor, bands, x_train, y_train, *args, **kwargs):
124 preproc =
lambda m: m.fit(x_train, y_train, bands)
125 estims = _bench_rs(sensor, bands, *args, allow_opt=
True, preprocess=preproc, **kwargs)
126 return {f
'{k}_opt': v
for k, v
in estims.items()}
129 def _bench_ml(sensor, x_train, y_train, x_test, *, x_other=None, verbose=False,
130 seed=42, bagging=True, gridsearch=False, scale=True, methods=None,
133 from sklearn.preprocessing
import RobustScaler, MinMaxScaler
134 from sklearn.model_selection
import GridSearchCV
135 from sklearn.multioutput
import MultiOutputRegressor
136 from sklearn.ensemble
import BaggingRegressor
138 from .ML
import models
140 args = getattr(kwargs,
'args',
None)
141 seed = getattr(args,
'seed', seed)
143 gridsearch_kwargs = {
'refit':
False,
'scoring':
'neg_median_absolute_error'}
145 'n_estimators' : getattr(args,
'n_rounds', 10),
146 'max_samples' : 0.75,
148 'random_state' : seed,
151 if len(y_train.shape) == 1: y_train = y_train[:,
None]
152 valid = np.isfinite(x_train).all(-1) & np.isfinite(y_train).all(-1)
153 x_train = x_train[valid]
154 y_train = y_train[valid]
160 x_scaler.fit(x_train)
161 y_scaler.fit(y_train)
162 x_test = x_scaler.transform(x_test)
163 x_train = x_scaler.transform(x_train)
164 y_train = y_scaler.transform(y_train)
166 preprocess =
lambda m: m.fit(x_train.copy(), y_train.copy())
167 postprocess =
None if not scale
else y_scaler.inverse_transform
169 if verbose
and gridsearch:
170 print(
'\nPerforming gridsearch...')
173 methods =
list(models.keys())
177 for method, params
in models.items():
178 if method
not in methods:
continue
179 methods.remove(method)
181 params[
'grid'][
'random_state'] = params[
'default'][
'random_state'] = seed
182 model_kwargs = params[
'default']
183 model_class = params[
'class']
184 n_jobs = 1
if method ==
'MDN' else 3
186 if y_train.shape[1] > 1:
187 model_class =
lambda *args, **kwargs: MultiOutputRegressor(params[
'class'](*args, **kwargs))
190 if gridsearch
and method !=
'SVM':
191 model = GridSearchCV(model_class(), params[
'grid'], n_jobs=n_jobs, **gridsearch_kwargs)
192 model.fit(x_train.copy(), y_train.copy())
194 model_kwargs = model.best_params_
195 if verbose: print(f
'Best {method} params: {model_kwargs}')
197 model = model_class(**model_kwargs)
198 if bagging: model = BaggingRegressor(model, **bagging_kwargs)
200 model.__name__ = method
201 estim[method] = _create_estimates(model, x_test, postprocess, preprocess, verbose=verbose, **kwargs)
203 if x_other
is not None:
204 other[method] = _create_estimates(model, x_other, postprocess)
207 print(f
'Unknown ML benchmark methods requested: {methods}')