NASA Ocean Color

ocssw V2022

 from .utils import ignore_warnings
 from scipy import stats
 import numpy as np 
 import functools 
  
  
 def validate_shape(func):
     ''' Decorator to flatten all function input arrays, and ensure shapes are the same '''
     @functools.wraps(func)
     def helper(*args, **kwargs):
         flat     = [a.flatten() if hasattr(a, 'flatten') else a for a in args]
         flat_shp = [a.shape for a in flat if hasattr(a, 'shape')]
         orig_shp = [a.shape for a in args if hasattr(a, 'shape')]
         assert(all(flat_shp[0] == s for s in flat_shp)), f'Shapes mismatch in {func.__name__}: {orig_shp}'
         return func(*flat, **kwargs)
     return helper  
  
  
 def only_finite(func):
     ''' Decorator to remove samples which are nan in any input array '''
     @validate_shape
     @functools.wraps(func)
     def helper(*args, **kwargs):
         stacked = np.vstack(args)
         valid   = np.all(np.isfinite(stacked), 0)
         assert(valid.sum()), f'No valid samples exist for {func.__name__} metric'
         return func(*stacked[:, valid], **kwargs)
     return helper 
  
  
 def only_positive(func):
     ''' Decorator to remove samples which are zero/negative in any input array '''
     @validate_shape
     @functools.wraps(func)  
     def helper(*args, **kwargs):
         stacked = np.vstack(args)
         valid   = np.all(stacked > 0, 0)
         assert(valid.sum()), f'No valid samples exist for {func.__name__} metric'
         return func(*stacked[:, valid], **kwargs)
     return helper 
  
  
 def label(name):
     ''' Label a function to aid in printing '''
     def wrapper(func):
         func.__name__ = name
         return ignore_warnings(func)
     return wrapper
  
  
 # ============================================================================
 ''' 
 When executing a function, decorator order starts with the 
 outermost decorator and works its way down the stack; e.g.
     @dec1
     @dec2
     def foo(): pass 
     def bar(): pass
 And then foo == dec1(dec2(bar)). So, foo will execute dec1, 
 then dec2, then the original function. 
  
 Below, in rmsle (for example), we have:
     rmsle = only_finite( only_positive( label(rmsle) ) ) 
 This means only_positive() will get the input arrays only
 after only_finite() removes any nan samples. As well, both
 only_positive() and only_finite() will have access to the 
 function __name__ assigned by label().
  
 For all functions below, y=true and y_hat=estimate
 '''
  
  
 @only_finite
 @label('RMSE')
 def rmse(y, y_hat):
     ''' Root Mean Squared Error '''
     return np.mean((y - y_hat) ** 2) ** .5
  
  
 @only_finite
 @only_positive
 @label('RMSLE')
 def rmsle(y, y_hat):
     ''' Root Mean Squared Logarithmic Error '''
     return np.mean(np.abs(np.log(y) - np.log(y_hat)) ** 2) ** 0.5 
  
  
 @only_finite
 @label('NRMSE')
 def nrmse(y, y_hat):
     ''' Normalized Root Mean Squared Error '''
     return ((y - y_hat) ** 2).mean() ** .5 / y.mean()
  
  
 @only_finite
 @label('MAE')
 def mae(y, y_hat):
     ''' Mean Absolute Error '''
     return np.mean(np.abs(y - y_hat))
  
  
 @only_finite
 @label('MAPE')
 def mape(y, y_hat):
     ''' Mean Absolute Percentage Error '''
     return 100 * np.mean(np.abs((y - y_hat) / y))
  
  
 @only_finite
 @label('<=0')
 def leqz(y, y_hat=None):
     ''' Less than or equal to zero (y_hat) '''
     if y_hat is None: y_hat = y
     return (y_hat <= 0).sum()
  
  
 @validate_shape
 @label('<=0|NaN')
 def leqznan(y, y_hat=None):
     ''' Less than or equal to zero (y_hat) '''
     if y_hat is None: y_hat = y
     return np.logical_or(np.isnan(y_hat), y_hat <= 0).sum()
  
  
 @only_finite
 @only_positive
 @label('MdSA')
 def mdsa(y, y_hat):
     ''' Median Symmetric Accuracy '''
     # https://agupubs.onlinelibrary.wiley.com/doi/full/10.1002/2017SW001669
     return 100 * (np.exp(np.median(np.abs(np.log(y_hat / y)))) - 1)
  
  
 @only_finite
 @only_positive
 @label('MSA')
 def msa(y, y_hat):
     ''' Mean Symmetric Accuracy '''
     # https://agupubs.onlinelibrary.wiley.com/doi/full/10.1002/2017SW001669
     return 100 * (np.exp(np.mean(np.abs(np.log(y_hat / y)))) - 1)
  
  
 @only_finite
 @only_positive
 @label('SSPB')
 def sspb(y, y_hat):
     ''' Symmetric Signed Percentage Bias '''
     # https://agupubs.onlinelibrary.wiley.com/doi/full/10.1002/2017SW001669
     M = np.median( np.log(y_hat / y) )
     return 100 * np.sign(M) * (np.exp(np.abs(M)) - 1)
  
  
 @only_finite
 @label('Bias')
 def bias(y, y_hat):
     ''' Mean Bias '''
     return np.mean(y_hat - y)
  
  
 @only_finite
 @only_positive
 @label('R^2')
 def r_squared(y, y_hat):
     ''' Logarithmic R^2 '''
     slope_, intercept_, r_value, p_value, std_err = stats.linregress(np.log10(y), np.log10(y_hat))
     return r_value**2
  
  
 @only_finite
 @only_positive
 @label('Slope')
 def slope(y, y_hat):
     ''' Logarithmic slope '''
     slope_, intercept_, r_value, p_value, std_err = stats.linregress(np.log10(y), np.log10(y_hat))
     return slope_
  
  
 @only_finite
 @only_positive
 @label('Intercept')
 def intercept(y, y_hat):
     ''' Locarithmic intercept '''
     slope_, intercept_, r_value, p_value, std_err = stats.linregress(np.log10(y), np.log10(y_hat))
     return intercept_
  
  
 @validate_shape
 @label('MWR')
 def mwr(y, y_hat, y_bench):
     ''' 
     Model Win Rate - Percent of samples in which model has a closer 
     estimate than the benchmark.
         y: true, y_hat: model, y_bench: benchmark 
     '''
     y_bench[y_bench < 0] = np.nan 
     y_hat[y_hat < 0] = np.nan 
     y[y < 0] = np.nan 
     valid = np.logical_and(np.isfinite(y_hat), np.isfinite(y_bench))
     diff1 = np.abs(y[valid] - y_hat[valid])
     diff2 = np.abs(y[valid] - y_bench[valid])
     stats = np.zeros(len(y))
     stats[valid]  = diff1 < diff2
     stats[~np.isfinite(y_bench)] = 1
     stats[~np.isfinite(y_hat)] = 0
     return stats.sum() / np.isfinite(y).sum()
  
  
 def performance(key, y, y_hat, metrics=[mdsa, sspb, slope, msa, rmsle, mae, leqznan], csv=False):
     ''' Return a string containing performance using various metrics. 
         y should be the true value, y_hat the estimated value. '''
     y     = y.flatten()
     y_hat = y_hat.flatten()
     try:
         if csv: return f'{key},'+','.join([f'{f.__name__}:{f(y, y_hat)}' for f in metrics])
         else:   return f'{key:>12} | '+'   '.join([f'{f.__name__}: {f(y, y_hat):>6.3f}' for f in metrics])
     except Exception as e: return f'{key:>12} | Exception: {e}'