OB.DAAC Logo
NASA Logo
Ocean Color Science Software

ocssw V2022
mk_matchup_sst.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 # coding: utf-8
3 
4 """
5 A Perl script to create and output satellite matchups from a SeaBASS file given an
6 OB.DAAC L2 satellite SST file and a valid SeaBASS file containing
7 lat, lon, date, and time as /field entries.
8 written by J.Scott on 2018/07/20 (joel.scott@nasa.gov)
9 """
10 
11 def main():
12 
13  import argparse
14  import os
15  import re
16  import subprocess
17  from datetime import datetime, timedelta
18  from copy import copy
19  from math import isnan
20  from collections import OrderedDict
21  from seabass.SB_support import readSB
22 
23  parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,description='''\
24  This program create and output satellite matchups from a given SeaBASS file.
25 
26  REQUIRED inputs:
27  1) --sat_file an OB.DAAC L2 satellite SST file(s)
28  2) --seabass_file a valid SeaBASS file(s) with latitude, longitude, and date-time information as field entries.
29 
30  Outputs:
31  1) the original SeaBASS data
32  AND
33  2) collocated satellite products as additional columns appended to --seabass_file
34 
35  Example usage call:
36  mk_matchup.py --sat_file=[file name].nc --seabass_file=[file name].sb
37 
38  Caveats:
39  * This script is designed to work with files that have been properly
40  formatted according to SeaBASS guidelines (i.e. Files that passed FCHECK).
41  Some error checking is performed, but improperly formatted input files
42  could cause this script to error or behave unexpectedly. Files
43  downloaded from the SeaBASS database should already be properly formatted,
44  however, please email seabass@seabass.gsfc.nasa.gov and/or the contact listed
45  in the metadata header if you identify problems with specific files.
46 
47  * It is always HIGHLY recommended that you check for and read any metadata
48  header comments and/or documentation accompanying data files. Information
49  from those sources could impact your analysis.
50 
51  * Compatibility: This script was developed for Python 3.6.
52 
53  License:
54  /*=====================================================================*/
55  NASA Goddard Space Flight Center (GSFC)
56  Software distribution policy for Public Domain Software
57 
58  The fd_matchup.py code is in the public domain, available without fee for
59  educational, research, non-commercial and commercial purposes. Users may
60  distribute this code to third parties provided that this statement appears
61  on all copies and that no charge is made for such copies.
62 
63  NASA GSFC MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THE SOFTWARE
64  FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED
65  WARRANTY. NEITHER NASA GSFC NOR THE U.S. GOVERNMENT SHALL BE LIABLE FOR
66  ANY DAMAGE SUFFERED BY THE USER OF THIS SOFTWARE.
67  /*=====================================================================*/
68  ''',add_help=True)
69 
70  parser.add_argument('--sat_file', nargs='+', type=argparse.FileType('r'), required=True, help='''\
71  REQUIRED: input OB.DAAC Level-2 satellite netCDF file
72  ''')
73 
74  parser.add_argument('--seabass_file', nargs='+', type=argparse.FileType('r'), required=True, help='''\
75  REQUIRED: input SeaBASS file
76  Must be a valid SeaBASS file, passing FHCHECK with no errors.
77  Matched-up satellite variables will be appended as additional fields to the data matrix and relevant headers.
78  File must contain latitude and longitude and date-time expressed as FIELD entries.
79  ''')
80 
81  parser.add_argument('--box_size', nargs=1, default=([5]), type=int, help=('''\
82  OPTIONAL: box size of the satellite data extract made around the in situ point
83  Valid values are odd numbers between 3 and 11, default = 5
84  '''))
85 
86  parser.add_argument('--min_valid_sat_pix', nargs=1, default=([50.0]), type=float, help=('''\
87  OPTIONAL: percent minimum valid satellite pixels required to create an extract
88  Valid value: (0.0 - 100.0), default = 50.0
89  '''))
90 
91  parser.add_argument('--max_time_diff', nargs=1, default=([0.5]), type=float, help=('''\
92  OPTIONAL: maximum time difference between satellite and in situ point
93  Valid value: decimal number of hours (0 - 36 hours), default = 3
94  '''))
95 
96  parser.add_argument('--verbose', default=False, action='store_true', help=('''\
97  OPTIONAL: Displays reason for failed matchup for each in situ target called.
98  '''))
99 
100  parser.add_argument('--no_header_comment', default=False, action='store_true', help=('''\
101  OPTIONAL: Flag to NOT append exclusion criteria to the OFILE header. Useful when running script repeatedly.
102  '''))
103 
104  args=parser.parse_args()
105  dict_args=vars(args)
106 
107  # input verification
108  if ((dict_args["box_size"][0] % 2) == 0) or (dict_args["box_size"][0] > 11) or (dict_args["box_size"][0] < 3):
109  parser.error("invalid --box_size specified, must be an ODD integer between 3 and 11")
110 
111  if (dict_args["min_valid_sat_pix"][0] > 100.0) or (dict_args["min_valid_sat_pix"][0] < 0.0):
112  parser.error("invalid --min_valid_sat_pix specified, must be a percentage expressed as a floating point number between 0.0 and 100.0")
113 
114  if (dict_args["max_time_diff"][0] > 36) or (dict_args["max_time_diff"][0] < 0):
115  parser.error("invalid --max_time_diff specified, must be a decimal number between 0 and 36")
116  else:
117  twin_Hmin = -1 * int(dict_args['max_time_diff'][0])
118  twin_Mmin = -60 * (dict_args['max_time_diff'][0] - int(dict_args['max_time_diff'][0]))
119  twin_Hmax = 1 * int(dict_args['max_time_diff'][0])
120  twin_Mmax = 60 * (dict_args['max_time_diff'][0] - int(dict_args['max_time_diff'][0]))
121 
122  for filein_sb in dict_args['seabass_file']:
123 
124  # read and verify SeaBASS file and required fields
125  if os.path.isfile(filein_sb.name):
126  ds = readSB(filename=filein_sb.name,
127  mask_missing=False,
128  mask_above_detection_limit=False,
129  mask_below_detection_limit=False,
130  no_warn=True)
131  else:
132  parser.error('ERROR: invalid --seabass_file specified; does ' + filein_sb.name + ' exist?')
133 
134  ds.datetime = ds.fd_datetime()
135  if not ds.datetime:
136  parser.error('missing fields in SeaBASS file -- file must contain a valid FIELDS combination of date/year/month/day/sdy and time/hour/minute/second')
137 
138  print('Looking for satellite/in situ match-ups for:',filein_sb.name)
139 
140  for filein_sat in dict_args['sat_file']:
141 
142  if not re.search('\.nc', filein_sat.name.lower()) or \
143  not re.search('l2', filein_sat.name.lower()) or \
144  not re.search('sst', filein_sat.name.lower()):
145  parser.error("invalid --sat_file specified, must be a Level-2 (L2) OB.DAAC SST netCDF (nc) file")
146  else:
147  #set l2_flags to check for OC/IOP versus SST/SST4 product suites
148  flag_arg = ' ignore_flags=LAND\ NAVFAIL\ NAVWARN' + \
149  ' count_flags=LAND\ NAVFAIL\ NAVWARN'
150 
151  print('Checking:',filein_sat.name)
152  write_flag = 0
153 
154  # loop through input SeaBASS file data rows
155  for row,dt in enumerate(ds.datetime):
156 
157  # create time range of satellite obs to extract
158  tim_min = dt + timedelta(hours=twin_Hmin,minutes=twin_Mmin)
159  tim_max = dt + timedelta(hours=twin_Hmax,minutes=twin_Mmax)
160 
161  # verify lat/lon inputs from file
162  try:
163  ds.lon = [float(i) for i in ds.data['lon']]
164  ds.lat = [float(i) for i in ds.data['lat']]
165  except Exception as E:
166  print(E)
167  parser.error('Missing fields in SeaBASS file. File must contain lat and lon as fields, or specify --slat and --slon.')
168 
169  if isnan(ds.lat[row]) or isnan(ds.lon[row]):
170  continue
171 
172  if abs(ds.lon[row]) > 180.0:
173  parser.error('invalid longitude input: all longitude values in ' + filein_sb.name + ' MUST be between -180/180E deg.')
174  if abs(ds.lat[row]) > 90.0:
175  parser.error('invalid latitude input: all latitude values in ' + filein_sb.name + ' MUST be between -90/90N deg.')
176 
177  # construct sys call to sstval_extract
178  sys_call_str = 'sstval_extract' + \
179  ' ifile=' + filein_sat.name + \
180  ' slon=' + str(ds.lon[row]) + \
181  ' slat=' + str(ds.lat[row]) + \
182  ' qual_check=qual_sst' + \
183  ' qual_check_distance=10' + \
184  ' global_att=1' + \
185  ' variable_att=1' + \
186  ' boxsize=' + str(dict_args['box_size'][0]) + flag_arg
187  # variable_att flag needed to extract units
188  # global_att flag needed to extract sensor/instrument names
189 
190  pid = subprocess.run(sys_call_str, shell=True, encoding='ascii', stdout=subprocess.PIPE, stderr=subprocess.PIPE)
191  if pid.returncode == 99:
192  #if dict_args['verbose']:
193  #print('No matchup: in situ target not in granule.')
194  continue #no valid matchup
195  elif pid.returncode == 101 or pid.returncode == 102:
196  parser.error('sstval_extract failed -- only accepts Level-2 (L2) satellite files. ' + \
197  filein_sat.name + ' is not a valid L2 file')
198  elif pid.returncode != 0:
199  print(pid.returncode)
200  print(pid.stdout)
201  print(pid.stderr)
202  parser.error('sstval_extract failed -- verify that the sstval_extract binary is compiled and on your PATH and that ' + \
203  filein_sat.name + ' exists')
204 
205  # define structures to keep track of sstval_extract's output files
206  file_ls = OrderedDict()
207  file_del = []
208  var_ls = []
209 
210  upix_ct = 0
211  fpix_ct = dict_args['box_size'][0]^2
212  pix_ct = 0
213 
214  tims = 0
215  tim_sat = 0
216 
217  cvs = []
218 
219  # parse the extract information
220  file_del.append(filein_sat.name + '.qc');
221  try:
222  fileobj = open(filein_sat.name + '.qc','r')
223  lines = fileobj.readlines()
224  for line in lines:
225  newline = re.sub("[\r\n]+",'',line)
226  if 'unflagged_pixel_count' in newline:
227  upix_ct = int(newline.split('=')[1])
228  elif 'flagged_pixel_count' in newline:
229  fpix_ct = int(newline.split('=')[1])
230  elif 'pixel_count' in newline:
231  pix_ct = int(newline.split('=')[1])
232  elif 'time' in newline:
233  try:
234  tims = re.search("(\d+)-(\d+)-(\d+)\s+(\d+):(\d+):(\d+)", newline.split('=')[1]);
235  tim_sat = datetime(year=int(tims.group(1)), \
236  month=int(tims.group(2)), \
237  day=int(tims.group(3)), \
238  hour=int(tims.group(4)), \
239  minute=int(tims.group(5)), \
240  second=int(tims.group(6)))
241  except:
242  continue
243  elif 'variables' in newline:
244  var_ls = newline.split('=')[1].split(',')
245  for var in var_ls:
246  file_del.append(filein_sat.name + '.qc.' + var);
247  if 'l2_flags' in var or \
248  'cntl_pt_rows' in var or \
249  'clat' in var or \
250  'clon' in var or \
251  'day' in var or \
252  'elat' in var or \
253  'elon' in var or \
254  'msec' in var or \
255  'slat' in var or \
256  'slon' in var or \
257  'year' in var:
258  continue
259  file_ls[var] = filein_sat.name + '.qc.' + var
260  fileobj.close()
261  except Exception as E:
262  print(E)
263  parser.error(' unable to open and read file ' + filein_sat.name + '.qc')
264 
265  # parse the satellite nc file information
266  file_del.append(filein_sat.name + '.qc.global_attrs');
267  [inst, plat] = readValEglobatt(filein_sat.name + '.qc.global_attrs', parser)
268  if not inst:
269  inst = 'na'
270  if not plat:
271  plat = 'na'
272 
273  # apply exclusion criteria
274  # compute and evaluate the max time diff test
275  if tim_sat > tim_max or tim_sat < tim_min:
276  clean_file_lis(file_del)
277  if dict_args['verbose']:
278  print('No matchup: failed MAX_TIME_DIFF, required =',dict_args["max_time_diff"][0],'Exclusion level = 1, Matrix row =',row)
279  continue #no valid matchup
280 
281  # compute and evaluate the min valid sat pix test
282  if (pix_ct - fpix_ct) != 0:
283  if upix_ct >= dict_args['box_size'][0]:
284  pix_thresh = 100.0 * (upix_ct / (pix_ct - fpix_ct))
285  if pix_thresh < dict_args['min_valid_sat_pix'][0]:
286  clean_file_lis(file_del)
287  if dict_args['verbose']:
288  print('No matchup: failed MIN_VALID_SAT_PIX, required =',dict_args['min_valid_sat_pix'][0],'found =',pix_thresh,'Exclusion level = 4, Matrix row =',row)
289  continue #no valid matchup
290  else:
291  clean_file_lis(file_del)
292  if dict_args['verbose']:
293  print('No matchup: failed MIN_VALID_SAT_PIX, extracted satellite pixels less than box size, required =',dict_args['box_size'][0],'found =',upix_ct,'Exclusion level = 3, Matrix row =',row)
294  continue #no valid matchup
295  else:
296  clean_file_lis(file_del)
297  if dict_args['verbose']:
298  print('No matchup: failed MIN_VALID_SAT_PIX, division by zero when deriving pix_thresh due to required L2FLAG criteria, Exclusion level = 2, Data row =',row)
299  continue #no valid matchup
300 
301  write_flag = 1 #only write out (write_flag == true), if matchups found
302 
303  #save L2_fname
304  L2file_varname = inst + '_' + plat + '_l2fname'
305  ds.addDataToOutput(row, L2file_varname.lower(), 'none', os.path.basename(filein_sat.name), True)
306 
307  #save tdiff
308  tdiff_varname = inst + '_' + plat + '_tdiff'
309  tdiff = tim_sat - dt
310  ds.addDataToOutput(row, tdiff_varname.lower(), 'seconds', tdiff.total_seconds(), True)
311 
312  # save extract-variables
313  for var in file_ls:
314 
315  #save extracted values for each var in file_lis
316  [fmax, fmin, fmean, fmedian, fstdev, centerval, units] = readValEfile(file_ls[var], parser)
317 
318  var_name = inst + '_' + plat + '_' + var.lower() + '_max'
319  ds.addDataToOutput(row, var_name.lower(), units, fmax, True)
320 
321  var_name = inst + '_' + plat + '_' + var.lower() + '_min'
322  ds.addDataToOutput(row, var_name.lower(), units, fmin, True)
323 
324  var_name = inst + '_' + plat + '_' + var.lower() + '_mean'
325  ds.addDataToOutput(row, var_name.lower(), units, fmean, True)
326 
327  var_name = inst + '_' + plat + '_' + var.lower() + '_median'
328  ds.addDataToOutput(row, var_name.lower(), units, fmedian, True)
329 
330  var_name = inst + '_' + plat + '_' + var.lower() + '_stddev'
331  ds.addDataToOutput(row, var_name.lower(), units, fstdev, True)
332 
333  var_name = inst + '_' + plat + '_' + var.lower() + '_center_pixel_value'
334  ds.addDataToOutput(row, var_name.lower(), units, centerval, True)
335 
336  clean_file_lis(file_del)
337 
338  if write_flag == 1:
339 
340  for line in ds.comments:
341  if 'File ammended by OCSSW match-up maker script' in line:
342  comment_flag = True
343  break
344 
345  else:
346  comment_flag = False
347 
348  if not dict_args['no_header_comment']:
349  ds.comments.append(' ')
350  ds.comments.append(' File ammended by OCSSW match-up maker script: mk_matchup_sst.py on ' + datetime.now().strftime('%Y-%m-%d %H:%M:%S') + '.')
351  ds.comments.append(' WARNINGS: This script does NOT adjust in situ data to water-leaving values')
352  ds.comments.append(' This script does NOT account for potential oversampling by the in situ data in time or space.')
353  ds.comments.append(' If successive calls to this script are made for a single in situ file AND multiple-valid-overpasses exist,')
354  ds.comments.append(' only the data from the last successive call will be saved to the output file. This may NOT be the best')
355  ds.comments.append(' quality satellite data in space and time.')
356  ds.comments.append(' EXCLUSION CRITERIA applied to this satellite file:')
357  ds.comments.append(' Box size of satellite extract = ' + str(dict_args['box_size'][0]) + ' pixels by ' + str(dict_args['box_size'][0]) + ' pixels')
358  ds.comments.append(' Minimum percent valid satellite pixels = ' + str(dict_args['min_valid_sat_pix'][0]))
359  ds.comments.append(' Maximum time difference between satellite and in situ = ' + str(dict_args['max_time_diff'][0]) + ' hours')
360  ds.comments.append(' The qual_sst value varies between 0 (best) and 4 (worst).')
361  ds.comments.append(' The qual_sst_mean (qual_sst_max) is the mean (max) of the ' + \
362  str(dict_args['box_size'][0]) + ' by ' + str(dict_args['box_size'][0]) + ' pixel satellite extract.')
363  ds.comments.append(' ')
364 
365  print('Satellite/in situ match-up(s) found.')
366 
367  ds.writeSBfile(filein_sb.name)
368 
369  else:
370  print('No valid satellite match-ups found.')
371 
372  print(' ')
373 
374  return
375 
376 
377 def clean_file_lis(file_ls):
378  import os
379  for d in file_ls:
380  try:
381  os.remove(d)
382  except Exception as E:
383  print(E)
384  print('WARNING: Cleanup of ',d,' failed. Verify that you have read/write priviledges in the current working directory.')
385  return
386 
387 
388 def readValEglobatt(fname, parser):
389  import re
390  inst = ''
391  plat = ''
392  try:
393  fileobj = open(fname,'r')
394  lines = fileobj.readlines()
395  for line in lines:
396  newline = re.sub("[\r\n]+",'',line)
397  if 'instrument=' in newline:
398  inst = newline.lower().split('=')[1]
399  elif 'platform=' in newline:
400  plat = newline.lower().split('=')[1]
401  fileobj.close()
402  except Exception as E:
403  print(E)
404  parser.error(' unable to open and read file ' + fname)
405  return(inst, plat)
406 
407 
408 def readValEfile(fname, parser):
409 
410  import re
411 
412  missing = ''
413  fmax = ''
414  fmin = ''
415  fmean = ''
416  fmedian = ''
417  fstdev = ''
418  centerval = ''
419  units = ''
420 
421  try:
422 
423  fileobj = open(fname,'r')
424  lines = fileobj.readlines()
425  fileobj.close()
426 
427  for line in lines:
428 
429  newline = re.sub("[\r\n]+",'',line)
430 
431  if '_FillValue' in newline:
432  missing = newline.split('=')[1]
433 
434  for line in lines:
435 
436  newline = re.sub("[\r\n]+",'',line)
437 
438  if 'filtered_max' in newline:
439  fmax = newline.split('=')[1]
440  if fmax == missing:
441  fmax = ''
442 
443  elif 'filtered_min' in newline:
444  fmin = newline.split('=')[1]
445  if fmin == missing:
446  fmin = ''
447 
448  elif 'filtered_mean' in newline:
449  fmean = newline.split('=')[1]
450  if fmean == missing:
451  fmean = ''
452 
453  elif 'filtered_median' in newline:
454  fmedian = newline.split('=')[1]
455  if fmedian == missing:
456  fmedian = ''
457 
458  elif 'filtered_stddev' in newline:
459  fstdev = newline.split('=')[1]
460  if fstdev == missing:
461  fstdev = ''
462 
463  elif 'center_value' in newline:
464  centerval = newline.split('=')[1]
465  if centerval == missing:
466  centerval = ''
467 
468  elif 'units' in newline:
469  units = re.sub('\s', '_', newline.split('=')[1])
470 
471  except Exception as E:
472 
473  print(E)
474  parser.error(' unable to open and read file ' + fname)
475 
476  return(fmax, fmin, fmean, fmedian, fstdev, centerval, units)
477 
478 
479 if __name__ == "__main__": main()
def readValEfile(fname, parser)
def readValEglobatt(fname, parser)
const char * str
Definition: l1c_msi.cpp:35
def clean_file_lis(file_ls)
#define abs(a)
Definition: misc.h:90