5 A script to create and output satellite matchups from a SeaBASS file given an
6 OB.DAAC L2 (SST, SST4, IOP, or OC) satellite file and a valid SeaBASS file containing
7 lat, lon, date, and time as /field entries or fixed --slat and --slon coords or a fixed
8 box bounded by --slat, --slon, --elat, and --elon. NOTE: --slat and --slon will override
9 lat/lons in --seabass_file.
10 written by J.Scott on 2016/12/13 (joel.scott@nasa.gov)
19 from datetime
import datetime, timedelta
20 from statistics
import median
22 from math
import isnan
23 from collections
import OrderedDict
24 from seabass.SB_support
import readSB
26 parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,description=
'''\
27 This program create and output satellite matchups from a given SeaBASS file.
30 1) --sat_file= an OB.DAAC L2 (SST, SST4, IOP, or OC) satellite file
31 2) --seabass_file= a valid SeaBASS file with latitude, longitude, and date-time information as field entries.
33 Notes on OPTIONAL inputs:
34 1) --slat= --slon= must be used together and will override any lat/lons in --seabass_file
35 2) --elat= --elon= must be used together and with --slon= and --slat=
36 will override any lat/lons in --seabass_file
37 uses a lat/lon bounding box instead of --box_size=
40 1) the original SeaBASS data
42 2) collocated satellite products as additional columns appended to --seabass_file
45 mk_matchup.py --sat_file=[file name].nc --seabass_file=[file name].sb
46 mk_matchup.py --sat_file=[file name].nc --seabass_file=[file name].sb --slat=45.3 --slon=-157.4
47 mk_matchup.py --sat_file=[file name].nc --seabass_file=[file name].sb --slat=45.3 --elat=48.7 --slon=-157.4 --elon=-145.3
50 * This script is designed to work with files that have been properly
51 formatted according to SeaBASS guidelines (i.e. Files that passed FCHECK).
52 Some error checking is performed, but improperly formatted input files
53 could cause this script to error or behave unexpectedly. Files
54 downloaded from the SeaBASS database should already be properly formatted,
55 however, please email seabass@seabass.gsfc.nasa.gov and/or the contact listed
56 in the metadata header if you identify problems with specific files.
58 * It is always HIGHLY recommended that you check for and read any metadata
59 header comments and/or documentation accompanying data files. Information
60 from those sources could impact your analysis.
62 * Compatibility: This script was developed for Python 3.5.
66 parser.add_argument(
'--sat_file', nargs=
'+', type=argparse.FileType(
'r'), required=
True, help=
'''\
67 REQUIRED: input OB.DAAC Level-2 satellite netCDF file(s)
70 parser.add_argument(
'--seabass_file', nargs=
'+', type=argparse.FileType(
'r'), required=
True, help=
'''\
71 REQUIRED: input SeaBASS file(s)
72 Must be a valid SeaBASS file, passing FHCHECK with no errors.
73 Matched-up satellite variables will be appended as additional fields to the data matrix and relevant headers.
74 File must contain latitude and longitude and date-time expressed as FIELD entries.
77 parser.add_argument(
'--box_size', nargs=1, default=([5]), type=int, help=(
'''\
78 OPTIONAL: box size of the satellite data extract made around the in situ point
79 Valid values are odd numbers between 3 and 11, default = 5
82 parser.add_argument(
'--min_valid_sat_pix', nargs=1, default=([50.0]), type=float, help=(
'''\
83 OPTIONAL: percent minimum valid satellite pixels required to create an extract
84 Valid value: (0.0 - 100.0), default = 50.0
87 parser.add_argument(
'--max_time_diff', nargs=1, default=([3.0]), type=float, help=(
'''\
88 OPTIONAL: maximum time difference between satellite and in situ point
89 Valid value: decimal number of hours (0 - 36 hours), default = 3
92 parser.add_argument(
'--max_coeff_variation', nargs=1, default=([0.15]), type=float, help=(
'''\
93 OPTIONAL: maximum coefficient of variation of satellite pixels within the satellite extract
94 Valid value: (0.0 - 1.0), default = 0.15
97 parser.add_argument(
'--slat', nargs=1, type=float, help=(
'''\
98 OPTIONAL: Starting latitude, south-most boundary
99 If used with --seabass_file, will override lats in the file
100 Valid values: (-90,90N)
103 parser.add_argument(
'--elat', nargs=1, type=float, help=(
'''\
104 OPTIONAL: Ending latitude, north-most boundary
105 If used with --seabass_file and --slat, will override lats in the file
106 Valid values: (-90,90N)
109 parser.add_argument(
'--slon', nargs=1, type=float, help=(
'''\
110 OPTIONAL: Starting longitude, west-most boundary
111 If used with --seabass_file, will override lons in the file
112 Valid values: (-180,180E)
115 parser.add_argument(
'--elon', nargs=1, type=float, help=(
'''\
116 OPTIONAL: Ending longitude, east-most boundary
117 If used with --seabass_file and --slon, will override lons in the file
118 Valid values: (-180,180E)
121 parser.add_argument(
'--verbose', default=
False, action=
'store_true', help=(
'''\
122 OPTIONAL: Displays reason for failed matchup for each in situ target called.
125 parser.add_argument(
'--no_header_comment', default=
False, action=
'store_true', help=(
'''\
126 OPTIONAL: Flag to NOT append exclusion criteria to the OFILE header. Useful when running script repeatedly.
133 args=parser.parse_args()
137 if ((dict_args[
"box_size"][0] % 2) == 0)
or (dict_args[
"box_size"][0] > 11)
or (dict_args[
"box_size"][0] < 3):
138 parser.error(
"invalid --box_size specified, must be an ODD integer between 3 and 11")
140 if (dict_args[
"min_valid_sat_pix"][0] > 100.0)
or (dict_args[
"min_valid_sat_pix"][0] < 0.0):
141 parser.error(
"invalid --min_valid_sat_pix specified, must be a percentage expressed as a floating point number between 0.0 and 100.0")
143 if (dict_args[
"max_time_diff"][0] > 36)
or (dict_args[
"max_time_diff"][0] < 0):
144 parser.error(
"invalid --max_time_diff specified, must be a decimal number between 0 and 36")
146 twin_Hmin = -1 *
int(dict_args[
'max_time_diff'][0])
147 twin_Mmin = -60 * (dict_args[
'max_time_diff'][0] -
int(dict_args[
'max_time_diff'][0]))
148 twin_Hmax = 1 *
int(dict_args[
'max_time_diff'][0])
149 twin_Mmax = 60 * (dict_args[
'max_time_diff'][0] -
int(dict_args[
'max_time_diff'][0]))
151 if (dict_args[
"max_coeff_variation"][0] > 1.0)
or (dict_args[
"max_coeff_variation"][0] < 0.0):
152 parser.error(
"invalid --max_coeff_variation specified, must be an floating point number between 0.0 and 1.0")
154 for filein_sb
in dict_args[
'seabass_file']:
157 ds = readSB(filename=filein_sb.name,
159 mask_above_detection_limit=
False,
160 mask_below_detection_limit=
False,
163 ds.datetime = ds.fd_datetime()
165 parser.error(
'missing fields in SeaBASS file -- file must contain a valid FIELDS combination of date/year/month/day/sdy and time/hour/minute/second')
167 print(
'Looking for satellite/in situ match-ups for:',filein_sb.name)
169 for filein_sat
in dict_args[
'sat_file']:
171 if not re.search(
'\.nc', filein_sat.name.lower())
or \
172 not re.search(
'l2', filein_sat.name.lower()):
173 parser.error(
"invalid --sat_file specified, must be a Level-2 (L2) OB.DAAC netCDF (nc) file")
176 if re.search(
'SST', filein_sat.name):
177 flag_arg =
' ignore_flags=LAND\ NAVFAIL\ NAVWARN' + \
178 ' count_flags=LAND\ NAVFAIL'
180 flag_arg =
' ignore_flags=LAND\ HIGLINT\ HILT\ HISATZEN\ HISOLZEN\ STRAYLIGHT\ CLDICE\ ATMFAIL\ LOWLW\ FILTER\ NAVFAIL\ NAVWARN' + \
181 ' count_flags=LAND\ NAVFAIL'
183 print(
'Checking:',filein_sat.name)
187 for row,dt
in enumerate(ds.datetime):
190 tim_min = dt + timedelta(hours=twin_Hmin,minutes=twin_Mmin)
191 tim_max = dt + timedelta(hours=twin_Hmax,minutes=twin_Mmax)
194 if args.slat
and args.slon
and args.elat
and args.elon:
196 if abs(dict_args[
'slon'][0]) > 180.0
or abs(dict_args[
'elon'][0]) > 180.0:
197 parser.error(
'invalid longitude inputs: --slon and --elon MUST be between -180/180E deg. Received --slon = ' + \
198 str(dict_args[
'slon'][0]) +
' and --elon = ' +
str(dict_args[
'elon'][0]))
199 if abs(dict_args[
'slat'][0]) > 90.0
or abs(dict_args[
'elat'][0]) > 90.0:
200 parser.error(
'invalid latitude inputs: --slat and --elat MUST be between -90/90N deg. Received --slat = ' + \
201 str(dict_args[
'slat'][0]) +
' and --elat = ' +
str(dict_args[
'elat'][0]))
202 if dict_args[
'slat'][0] > dict_args[
'elat'][0]:
203 parser.error(
'invalid latitude inputs: --slat MUST be less than --elat and both MUST be between -90/90N deg. Received --slat = ' + \
204 str(dict_args[
'slat'][0]) +
' and --elat = ' +
str(dict_args[
'elat'][0]))
205 if dict_args[
'slon'][0] > dict_args[
'elon'][0]:
206 parser.error(
'invalid longitude inputs: --slon MUST be less than --elon and both MUST be between -180/180E deg. Received --slon = ' + \
207 str(dict_args[
'slon'][0]) +
' and --elon = ' +
str(dict_args[
'elon'][0]))
210 sys_call_str =
'val_extract' + \
211 ' ifile=' + filein_sat.name + \
212 ' slon=' +
str(dict_args[
'slon'][0]) + \
213 ' slat=' +
str(dict_args[
'slat'][0]) + \
214 ' elon=' +
str(dict_args[
'elon'][0]) + \
215 ' elat=' +
str(dict_args[
'elat'][0]) + \
217 ' variable_att=1' + flag_arg
223 pid = subprocess.run(sys_call_str, shell=
True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
224 if pid.returncode == 99:
225 if dict_args[
'verbose']:
226 print(
'No matchup: in situ target not in granule')
228 elif pid.returncode == 101
or pid.returncode == 102:
229 parser.error(
'val_extract failed -- only accepts Level-2 (L2) satellite files. ' + \
230 filein_sat.name +
' is not a valid L2 file')
231 elif pid.returncode != 0:
232 parser.error(
'val_extract failed -- verify that the val_extract binary is compiled and on your PATH and that ' + \
233 filein_sat.name +
' exists')
236 elif args.slat
and args.slon
and not args.elat
and not args.elon:
238 if abs(dict_args[
'slon'][0]) > 180.0:
239 parser.error(
'invalid longitude inputs: --slon MUST be between -180/180E deg. Received --slon = ' +
str(dict_args[
'slon'][0]))
240 if abs(dict_args[
'slat'][0]) > 90.0:
241 parser.error(
'invalid latitude inputs: --slat MUST be between -90/90N deg. Received --slat = ' +
str(dict_args[
'slat'][0]))
244 sys_call_str =
'val_extract' + \
245 ' ifile=' + filein_sat.name + \
246 ' slon=' +
str(dict_args[
'slon'][0]) + \
247 ' slat=' +
str(dict_args[
'slat'][0]) + \
249 ' variable_att=1' + \
250 ' boxsize=' +
str(dict_args[
'box_size'][0]) + flag_arg
256 pid = subprocess.run(sys_call_str, shell=
True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
257 if pid.returncode == 99:
258 if dict_args[
'verbose']:
259 print(
'No matchup: in situ target not in granule')
261 elif pid.returncode == 101
or pid.returncode == 102:
262 parser.error(
'val_extract failed -- only accepts Level-2 (L2) satellite files. ' + \
263 filein_sat.name +
' is not a valid L2 file')
264 elif pid.returncode != 0:
265 parser.error(
'val_extract failed -- verify that the val_extract binary is compiled and on your PATH and that ' + \
266 filein_sat.name +
' exists')
273 ds.lon = [
float(i)
for i
in ds.data[
'lon']]
274 ds.lat = [
float(i)
for i
in ds.data[
'lat']]
275 except Exception
as E:
277 parser.error(
'Missing fields in SeaBASS file. File must contain lat and lon as fields, or specify --slat and --slon.')
279 if isnan(ds.lat[row])
or isnan(ds.lon[row]):
282 if abs(ds.lon[row]) > 180.0:
283 parser.error(
'invalid longitude input: all longitude values in ' + filein_sb.name +
' MUST be between -180/180E deg.')
284 if abs(ds.lat[row]) > 90.0:
285 parser.error(
'invalid latitude input: all latitude values in ' + filein_sb.name +
' MUST be between -90/90N deg.')
288 sys_call_str =
'val_extract' + \
289 ' ifile=' + filein_sat.name + \
290 ' slon=' +
str(ds.lon[row]) + \
291 ' slat=' +
str(ds.lat[row]) + \
293 ' variable_att=1' + \
294 ' boxsize=' +
str(dict_args[
'box_size'][0]) + flag_arg
300 pid = subprocess.run(sys_call_str, shell=
True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
301 if pid.returncode == 99:
305 elif pid.returncode == 101
or pid.returncode == 102:
306 parser.error(
'val_extract failed -- only accepts Level-2 (L2) satellite files. ' + \
307 filein_sat.name +
' is not a valid L2 file')
308 elif pid.returncode != 0:
309 parser.error(
'val_extract failed -- verify that the val_extract binary is compiled and on your PATH and that ' + \
310 filein_sat.name +
' exists')
313 file_ls = OrderedDict()
318 fpix_ct = dict_args[
'box_size'][0]^2
327 file_del.append(filein_sat.name +
'.qc');
329 fileobj = open(filein_sat.name +
'.qc',
'r')
330 lines = fileobj.readlines()
332 newline = re.sub(
"[\r\n]+",
'',line)
333 if 'unflagged_pixel_count' in newline:
334 upix_ct =
int(newline.split(
'=')[1])
335 elif 'flagged_pixel_count' in newline:
336 fpix_ct =
int(newline.split(
'=')[1])
337 elif 'pixel_count' in newline:
338 pix_ct =
int(newline.split(
'=')[1])
339 elif 'time' in newline:
341 tims = re.search(
"(\d+)-(\d+)-(\d+)\s+(\d+):(\d+):(\d+)", newline.split(
'=')[1]);
342 tim_sat = datetime(year=
int(tims.group(1)), \
343 month=
int(tims.group(2)), \
344 day=
int(tims.group(3)), \
345 hour=
int(tims.group(4)), \
346 minute=
int(tims.group(5)), \
347 second=
int(tims.group(6)))
350 elif 'variables' in newline:
351 var_ls = newline.split(
'=')[1].split(
',')
353 file_del.append(filein_sat.name +
'.qc.' + var);
354 if 'l2_flags' in var
or \
355 'longitude' in var
or \
358 file_ls[var] = filein_sat.name +
'.qc.' + var
360 except Exception
as E:
362 parser.error(
' unable to open and read file ' + filein_sat.name +
'.qc')
365 file_del.append(filein_sat.name +
'.qc.global_attrs');
366 [inst, plat] =
readValEglobatt(filein_sat.name +
'.qc.global_attrs', parser)
374 if tim_sat > tim_max
or tim_sat < tim_min:
376 if dict_args[
'verbose']:
377 print(
'No matchup: failed MAX_TIME_DIFF, required =',dict_args[
"max_time_diff"][0],
'Exclusion level = 1, Matrix row =',row)
381 if (pix_ct - fpix_ct) != 0:
382 if upix_ct >= dict_args[
'box_size'][0]:
383 pix_thresh = 100.0 * (upix_ct / (pix_ct - fpix_ct))
384 if pix_thresh < dict_args[
'min_valid_sat_pix'][0]:
386 if dict_args[
'verbose']:
387 print(
'No matchup: failed MIN_VALID_SAT_PIX, required =',dict_args[
'min_valid_sat_pix'][0],
'found =',pix_thresh,
'Exclusion level = 4, Matrix row =',row)
391 if dict_args[
'verbose']:
392 print(
'No matchup: failed MIN_VALID_SAT_PIX, extracted satellite pixels less than box size, required =',dict_args[
'box_size'][0],
'found =',upix_ct,
'Exclusion level = 3, Matrix row =',row)
396 if dict_args[
'verbose']:
397 print(
'No matchup: failed MIN_VALID_SAT_PIX, division by zero when deriving pix_thresh due to required L2FLAG criteria, Exclusion level = 2, Data row =',row)
403 m = re.search(
"(rrs|aot)_([\d.]+)", var.lower())
405 if (
float(m.group(2)) > 405
and float(m.group(2)) < 570)
or (
float(m.group(2)) > 860
and float(m.group(2)) < 900):
406 if 'modis' in inst.lower():
408 if float(m.group(2)) == 469
or float(m.group(2)) == 555:
411 [fmean, fstdev, units] =
readValEfile(file_ls[var], parser)
412 if not fmean
or not fstdev:
414 if float(fmean) != 0:
422 if median(cvs) > dict_args[
'max_coeff_variation'][0]:
424 if dict_args[
'verbose']:
425 print(
'No matchup: failed MAX_COEF_OF_VARIATION, required =',dict_args[
'max_coeff_variation'][0],
'found =',median(cvs),
'Exclusion level = 5, Data row =',row)
431 L2file_varname = inst +
'_' + plat +
'_l2fname'
432 ds.addDataToOutput(row, L2file_varname.lower(),
'none', os.path.basename(filein_sat.name),
True)
435 tdiff_varname = inst +
'_' + plat +
'_tdiff'
437 ds.addDataToOutput(row, tdiff_varname.lower(),
'seconds', tdiff.total_seconds(),
True)
441 rrscv_varname = inst +
'_' + plat +
'_rrsaot_cv'
442 ds.addDataToOutput(row, rrscv_varname.lower(),
'unitless', median(cvs),
True)
446 if 'qual_sst' in var:
450 var_name = inst +
'_' + plat +
'_' + var.lower() +
'_mean'
451 ds.addDataToOutput(row, var_name.lower(),
'none', fmean,
True)
454 var_name = inst +
'_' + plat +
'_' + var.lower() +
'_max'
455 ds.addDataToOutput(row, var_name.lower(),
'none', fmax,
True)
458 [fmean, fstdev, units] =
readValEfile(file_ls[var], parser)
461 var_name = inst +
'_' + plat +
'_' + var.lower()
462 ds.addDataToOutput(row, var_name.lower(), units, fmean,
True)
465 var_name = inst +
'_' + plat +
'_' + var.lower() +
'_sd'
466 ds.addDataToOutput(row, var_name.lower(), units, fstdev,
True)
474 for line
in ds.comments:
475 if 'File ammended by OCSSW match-up maker script' in line:
479 if not dict_args[
'no_header_comment']
and not comment_flag:
480 ds.comments.append(
' ')
481 ds.comments.append(
' File ammended by OCSSW match-up maker script: mk_matchup.py on ' + datetime.now().strftime(
'%Y-%m-%d %H:%M:%S') +
'.')
482 ds.comments.append(
' WARNINGS: This script does NOT adjust in situ data to water-leaving values')
483 ds.comments.append(
' This script does NOT account for potential oversampling by the in situ data in time or space.')
484 ds.comments.append(
' If successive calls to this script are made for a single in situ file AND multiple-valid-overpasses exist,')
485 ds.comments.append(
' only the data from the last successive call will be saved to the output file. This may NOT be the best')
486 ds.comments.append(
' quality satellite data in space and time.')
487 ds.comments.append(
' Default exclusion criteria are obtained from: S.W. Bailey and P.J. Werdell, "A multi-sensor approach')
488 ds.comments.append(
' for the on-orbit validation of ocean color satellite data products", Rem. Sens. Environ. 102, 12-23 (2006).')
489 ds.comments.append(
' NOTE: The coefficient of variation is computed using all available Rrs between 405nm and 570nm and AOT between 860nm and 900nm,')
490 ds.comments.append(
' with the exception of MODIS Rrs land bands at 469nm and 555nm')
491 ds.comments.append(
' EXCLUSION CRITERIA applied to this satellite file:')
492 ds.comments.append(
' Box size of satellite extract = ' +
str(dict_args[
'box_size'][0]) +
' pixels by ' +
str(dict_args[
'box_size'][0]) +
' pixels')
493 ds.comments.append(
' Minimum percent valid satellite pixels = ' +
str(dict_args[
'min_valid_sat_pix'][0]))
494 ds.comments.append(
' Maximum solar zenith angle = 70 degrees')
495 ds.comments.append(
' Maximum satellite zenith angel = 60 degrees')
496 ds.comments.append(
' Maximum time difference between satellite and in situ = ' +
str(dict_args[
'max_time_diff'][0]) +
' hours')
497 ds.comments.append(
' Maximum coefficient of variation of satellite pixels = ' +
str(dict_args[
'max_coeff_variation'][0]))
498 ds.comments.append(
' EXCEPTIONS to Bailey and Werdell (2006):')
499 ds.comments.append(
' 1. User defined values given to mk_matchup.py will override recommended defaults.')
500 ds.comments.append(
' 2. The maximum allowed solar zenith angle used here is 70-deg vs the paper-recommended 75-deg.')
501 ds.comments.append(
' 3. Rrs and AOT data are only in the OC L2 satellite product suite.')
502 ds.comments.append(
' Other file_types (SST, SST4, IOP, etc) will not evaluate any maximum coefficient of variation threshhold.')
503 ds.comments.append(
' 4. For all SST file_types, the qual_sst_max or qual_sst_mean fields should be used to screen the sst value quality.')
504 ds.comments.append(
' The qual_sst value varies between 0 (best) and 4 (worst).')
505 ds.comments.append(
' The qual_sst_mean (qual_sst_max) is the mean (max) of the ' + \
506 str(dict_args[
'box_size'][0]) +
' by ' +
str(dict_args[
'box_size'][0]) +
' pixel satellite extract.')
507 ds.comments.append(
' ')
509 print(
'Satellite/in situ match-up(s) found.')
511 ds.writeSBfile(filein_sb.name)
514 print(
'No valid satellite match-ups found.')
527 print(
'WARNING: Cleanup of ',d,
' failed. Verify that you have read/write priviledges in the current working directory.')
536 fileobj = open(fname,
'r')
537 lines = fileobj.readlines()
539 newline = re.sub(
"[\r\n]+",
'',line)
540 if 'instrument=' in newline:
541 inst = newline.lower().split(
'=')[1]
542 elif 'platform=' in newline:
543 plat = newline.lower().split(
'=')[1]
545 except Exception
as E:
547 parser.error(
' unable to open and read file ' + fname)
557 fileobj = open(fname,
'r')
558 lines = fileobj.readlines()
562 newline = re.sub(
"[\r\n]+",
'',line)
563 if 'filtered_mean' in newline:
564 fmean = newline.split(
'=')[1]
565 elif 'filtered_stddev' in newline:
566 fstdev = newline.split(
'=')[1]
567 elif 'units' in newline:
568 units = re.sub(
'\s',
'_', newline.split(
'=')[1])
569 except Exception
as E:
571 parser.error(
' unable to open and read file ' + fname)
572 return(fmean, fstdev, units)
580 fileobj = open(fname,
'r')
581 lines = fileobj.readlines()
585 newline = re.sub(
"[\r\n]+",
'',line)
586 if 'mean' in newline
and not 'filtered_' in newline:
587 fmean = newline.split(
'=')[1]
588 elif 'max' in newline:
589 fmax = newline.split(
'=')[1]
590 except Exception
as E:
592 parser.error(
' unable to open and read file ' + fname)
596 if __name__ ==
"__main__":
main()