Go to the documentation of this file. 1 """ Module for manipulating data from NASA GSFC SeaBASS files.
3 Author: Joel Scott, SAIC / NASA GSFC Ocean Ecology Lab
6 * This module is designed to work with files that have been properly
7 formatted according to SeaBASS guidelines (i.e. Files that passed FCHECK).
8 Some error checking is performed, but improperly formatted input files
9 could cause this script to error or behave unexpectedly. Files
10 downloaded from the SeaBASS database should already be properly formatted,
11 however, please email seabass@seabass.gsfc.nasa.gov and/or the contact listed
12 in the metadata header if you identify problems with specific files.
14 * It is always HIGHLY recommended that you check for and read any metadata
15 header comments and/or documentation accompanying data files. Information
16 from those sources could impact your analysis.
18 * Compatibility: This module was developed for Python 3.6, using Python 3.6.3
20 /*=====================================================================*/
21 NASA Goddard Space Flight Center (GSFC)
22 Software distribution policy for Public Domain Software
24 The readsb code is in the public domain, available without fee for
25 educational, research, non-commercial and commercial purposes. Users may
26 distribute this code to third parties provided that this statement appears
27 on all copies and that no charge is made for such copies.
29 NASA GSFC MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THE SOFTWARE
30 FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED
31 WARRANTY. NEITHER NASA GSFC NOR THE U.S. GOVERNMENT SHALL BE LIABLE FOR
32 ANY DAMAGE SUFFERED BY THE USER OF THIS SOFTWARE.
33 /*=====================================================================*/
40 from datetime
import datetime
41 from collections
import OrderedDict
48 is_number determines if a given string is a number or not, does not handle complex numbers
49 returns True for int, float, or long numbers, else False
50 syntax: is_number(str)
64 is_int determines if a given string is an integer or not, uses int()
65 returns True for int numbers, else False
80 doy2mndy returns the month and day of month as integers
81 given year and julian day
82 syntax: [mn, dy] = doy2mndy(yr, doy)
85 from datetime
import datetime
87 dt = datetime.strptime(
'{:04d}{:03d}'.format(yr,doy),
'%Y%j')
89 return int(dt.strftime(
'%m')),
int(dt.strftime(
'%d'))
94 """ Read an FCHECK-verified SeaBASS formatted data file.
96 Returned data structures:
97 .filename = name of data file
98 .headers = dictionary of header entry and value, keyed by header entry
99 .comments = list of strings containing the comment lines from the header information
100 .missing = fill value as a float used for missing data, read from header
101 .variables = dictionary of field name and unit, keyed by field name
102 .data = dictionary of data values, keyed by field name, returned as a list
103 .length = number of rows in the data matrix (i.e. the length of each list in data)
104 .bdl = fill value as a float used for below detection limit, read from header (empty if missing or N/A)
105 .adl = fill value as a float used for above detection limit, read from header (empty if missing or N/A)
107 Returned sub-functions:
108 .fd_datetime() - Converts date and time information from the file's data matrix to a Python
109 list of datetime objects
110 .addDataToOutput(irow,var_name,units,var_value) - Adds or appends single data point to data matrix given row index, field name,
111 field units, and data value, handling fields & units headers and missing values
112 .writeSBfile(ofile) - Writes headers, comments, and data into a SeaBASS file specified by ofile
115 def __init__(self, filename, mask_missing=True, mask_above_detection_limit=True, mask_below_detection_limit=True, no_warn=False):
118 filename = name of SeaBASS input file (string)
121 mask_missing = flag to set missing values to NaN, default set to True
122 mask_above_detection_limit = flag to set above_detection_limit values to NaN, default set to True
123 mask_below_detection_limit = flag to set below_detection_limit values to NaN, default set to True
124 no_warn = flag to suppress warnings, default set to False
145 except Exception
as e:
146 raise Exception(
'Unable to open file for reading: {:}. Error: {:}'.format(self.
filename,e))
150 lines = fileobj.readlines()
153 except Exception
as e:
154 raise Exception(
'Unable to read data from file: {:}. Error: {:}'.format(self.
filename,e))
157 """ Remove any/all newline and carriage return characters """
158 lines = [re.sub(
"[\r\n]+",
'',line).strip()
for line
in lines]
162 """ Extract header """
164 and not '/begin_header' in line.lower() \
165 and not '/end_header' in line.lower() \
168 [h,v] = line.split(
'=', 1)
175 raise Exception(
'Unable to parse header key/value pair. Is this a SeaBASS file: {:}\nLine: {:}'.format(self.
filename,line))
178 """ Extract fields """
179 if '/fields=' in line.lower()
and not '!' in line:
181 _vars = line.split(
'=', 1)[1].lower().split(
',')
185 except Exception
as e:
186 raise Exception(
'Unable to parse /fields in file: {:}. Error: {:}. In line: {:}'.format(self.
filename,e,line))
189 """ Extract units """
190 if '/units=' in line.lower()
and not '!' in line:
191 _units = line.split(
'=', 1)[1].lower().split(
',')
193 """ Extract missing val """
194 if '/missing=' in line.lower()
and not '!' in line:
198 except Exception
as e:
199 raise Exception(
'Unable to parse /missing value in file: {:}. Error: {:}. In line: {:}'.format(self.
filename,e,line))
202 """ Extract optical depth warning """
203 if '/data_use_warning=' in line.lower()
and not '!' in line:
206 """ Extract below detection limit """
207 if '/below_detection_limit=' in line.lower()
and not '!' in line:
209 self.
bdl =
float(line.split(
'=', 1)[1])
211 except Exception
as e:
212 raise Exception(
'Unable to parse /below_detection_limit value in file: {:}. Error: {:}. In line: {:}'.format(self.
filename,e,line))
215 """ Extract below detection limit """
216 if '/above_detection_limit=' in line.lower()
and not '!' in line:
218 self.
adl =
float(line.split(
'=', 1)[1])
220 except Exception
as e:
221 raise Exception(
'Unable to parse /above_detection_limit value in file: {:}. Error: {:}. In line: {:}'.format(self.
filename,e,line))
225 if '/investigators=' in line.lower()
and not '!' in line:
226 self.
pi = line.split(
'=', 1)[1].split(
',', 1)[0]
228 """ Extract delimiter """
229 if '/delimiter=' in line.lower()
and not '!' in line:
230 if 'comma' in line.lower():
232 elif 'space' in line.lower():
234 elif 'tab' in line.lower():
237 raise Exception(
'Invalid delimiter detected in file: {:}. In line: {:}'.format(self.
filename,line))
240 """ Extract comments, but not history of metadata changes """
241 if '!' in line
and not '!/' in line:
244 """ Check for required SeaBASS file header elements before parsing data matrix """
245 if '/end_header' in line.lower():
247 raise Exception(
'No valid /delimiter detected in file: {:}'.format(self.
filename))
251 raise Exception(
'No valid /missing value detected in file: {:}'.format(self.
filename))
255 raise Exception(
'No /fields detected in file: {:}'.format(self.
filename))
259 print(
'Warning: data_use_warning header is present in file: {:}. This file contains measurements collected under unique conditions. Use with caution and consult headers, file comments, and documentation for additional information. Use no_warn=True to suppress this message.'.format(self.
filename))
261 if mask_above_detection_limit
and not no_warn:
263 print(
'Warning: No above_detection_limit in file: {:}. Unable to mask values as NaNs. Use no_warn=True to suppress this message.'.format(self.
filename))
265 if mask_below_detection_limit
and not no_warn:
267 print(
'Warning: No below_detection_limit in file: {:}. Unable to mask values as NaNs. Use no_warn=True to suppress this message.'.format(self.
filename))
272 """ Extract data after headers """
273 if end_header
and line:
275 for var,dat
in zip(_vars,re.split(delim,line)):
282 if mask_above_detection_limit
and self.
adl !=
'':
286 if mask_below_detection_limit
and self.
bdl !=
'':
290 if mask_missing
and dat == self.
missing:
293 self.
data[var].append(dat)
297 except Exception
as e:
298 raise Exception(
'Unable to parse data from line in file: {:}. Error: {:}. In line: {:}'.format(self.
filename,e,line))
302 self.
variables = OrderedDict(zip(_vars,zip(_vars,_units)))
306 print(
'Warning: No valid units were detected in file: {:}. Use no_warn=True to suppress this message.'.format(self.
filename))
308 self.
variables = OrderedDict(zip(_vars,_vars))
315 """ Convert date and time information from the file's data to a Python list of datetime objects.
317 Returned data structure:
318 dt = a list of Python datetime objects
320 Looks for these fields in this order:
322 year/month/day/hour/minute/second,
324 date/hour/minute/second,
326 year/sdy/hour/minute/second,
328 year/month/day/hour/minute,
330 year/sdy/hour/minute,
337 start_date/start_time (headers),
339 in the SELF Python structure.
344 raise ValueError(
'readSB.data structure is missing for file: {:}'.format(self.
filename))
347 if 'date' in self.
data and \
350 for d,t
in zip([
str(de)
for de
in self.
data[
'date']],self.
data[
'time']):
351 da = re.search(
"(\d{4})(\d{2})(\d{2})", d)
352 ti = re.search(
"(\d{1,2})\:(\d{2})\:(\d{2})", t)
354 dt.append(datetime(
int(da.group(1)), \
361 raise ValueError(
'date/time fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
364 elif 'year' in self.
data and \
365 'month' in self.
data and \
366 'day' in self.
data and \
367 'hour' in self.
data and \
368 'minute' in self.
data and \
369 'second' in self.
data:
371 for y,m,d,h,mn,s
in zip(self.
data[
'year'], self.
data[
'month'], self.
data[
'day'], self.
data[
'hour'], self.
data[
'minute'], self.
data[
'second']):
373 dt.append(datetime(
int(y), \
380 raise ValueError(
'year/month/day/hour/minute/second fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
383 elif 'year' in self.
data and \
384 'month' in self.
data and \
385 'day' in self.
data and \
388 for y,m,d,t
in zip(self.
data[
'year'], self.
data[
'month'], self.
data[
'day'], self.
data[
'time']):
389 ti = re.search(
"(\d{1,2})\:(\d{2})\:(\d{2})", t)
391 dt.append(datetime(
int(y), \
398 raise ValueError(
'year/month/day/time fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
401 elif 'date' in self.
data and \
402 'hour' in self.
data and \
403 'minute' in self.
data and \
404 'second' in self.
data:
406 for d,h,mn,s
in zip([
str(de)
for de
in self.
data[
'date']], self.
data[
'hour'], self.
data[
'minute'], self.
data[
'second']):
407 da = re.search(
"(\d{4})(\d{2})(\d{2})", d)
409 dt.append(datetime(
int(da.group(1)), \
416 raise ValueError(
'date/hour/minute/second fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
419 elif 'date_time' in self.
data:
421 for i
in self.
data(
'date_time'):
422 da = re.search(
"(\d{4})-(\d{2})-(\d{2})\s(\d{1,2})\:(\d{2})\:(\d{2})", i)
424 dt.append(datetime(
int(da.group(1)), \
431 raise ValueError(
'date_time field not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
434 elif 'year' in self.
data and \
435 'sdy' in self.
data and \
436 'hour' in self.
data and \
437 'minute' in self.
data and \
438 'second' in self.
data:
440 for y,sdy,h,mn,s
in zip(self.
data[
'year'], self.
data[
'sdy'], self.
data[
'hour'], self.
data[
'minute'], self.
data[
'second']):
443 dt.append(datetime(
int(y), \
450 raise ValueError(
'year/sdy/hour/minute/second fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
453 elif 'year' in self.
data and \
454 'sdy' in self.
data and \
457 for y,sdy,t
in zip(self.
data[
'year'], self.
data[
'sdy'], self.
data[
'time']):
459 ti = re.search(
"(\d{1,2})\:(\d{2})\:(\d{2})", t)
461 dt.append(datetime(
int(y), \
468 raise ValueError(
'year/sdy/time fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
471 elif 'start_date' in self.
headers and \
474 da = re.search(
"(\d{4})(\d{2})(\d{2})", self.
headers[
'start_date'])
475 for t
in self.
data[
'time']:
476 ti = re.search(
"(\d{1,2})\:(\d{2})\:(\d{2})", t)
478 dt.append(datetime(
int(da.group(1)), \
485 raise ValueError(
'start_date header and time field not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
488 elif 'start_date' in self.
headers and \
489 'hour' in self.
data and \
490 'minute' in self.
data and \
491 'second' in self.
data:
493 da = re.search(
"(\d{4})(\d{2})(\d{2})", self.
headers[
'start_date'])
494 for h,mn,s
in zip(self.
data[
'hour'], self.
data[
'minute'], self.
data[
'second']):
496 dt.append(datetime(
int(da.group(1)), \
503 raise ValueError(
'start_date header and hour/minute/second field not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
506 elif 'year' in self.
data and \
507 'month' in self.
data and \
508 'day' in self.
data and \
509 'hour' in self.
data and \
510 'minute' in self.
data:
512 for y,m,d,h,mn
in zip(self.
data[
'year'], self.
data[
'month'], self.
data[
'day'], self.
data[
'hour'], self.
data[
'minute']):
514 dt.append(datetime(
int(y), \
521 raise ValueError(
'year/month/day/hour/minute fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
524 elif 'date' in self.
data and \
525 'hour' in self.
data and \
526 'minute' in self.
data:
528 for d,h,mn
in zip([
str(de)
for de
in self.
data[
'date']], self.
data[
'hour'], self.
data[
'minute']):
529 da = re.search(
"(\d{4})(\d{2})(\d{2})", d)
531 dt.append(datetime(
int(da.group(1)), \
538 raise ValueError(
'date/hour/minute fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
541 elif 'year' in self.
data and \
542 'sdy' in self.
data and \
543 'hour' in self.
data and \
544 'minute' in self.
data:
546 for y,sdy,h,mn
in zip(self.
data[
'year'], self.
data[
'sdy'], self.
data[
'hour'], self.
data[
'minute']):
549 dt.append(datetime(
int(y), \
556 raise ValueError(
'year/sdy/hour/minute fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
559 elif 'year' in self.
data and \
560 'month' in self.
data and \
561 'day' in self.
data and \
564 for y,m,d,h
in zip(self.
data[
'year'], self.
data[
'month'], self.
data[
'day'], self.
data[
'hour']):
566 dt.append(datetime(
int(y), \
573 raise ValueError(
'year/month/day/hour fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
576 elif 'date' in self.
data and \
579 for d,h
in zip([
str(de)
for de
in self.
data[
'date']], self.
data[
'hour']):
580 da = re.search(
"(\d{4})(\d{2})(\d{2})", d)
582 dt.append(datetime(
int(da.group(1)), \
589 raise ValueError(
'date/hour fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
592 elif 'year' in self.
data and \
593 'sdy' in self.
data and \
596 for y,sdy,h
in zip(self.
data[
'year'], self.
data[
'sdy'], self.
data[
'hour']):
599 dt.append(datetime(
int(y), \
606 raise ValueError(
'year/sdy/hour fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
609 elif 'year' in self.
data and \
610 'month' in self.
data and \
613 for y,m,d
in zip(self.
data[
'year'], self.
data[
'month'], self.
data[
'day']):
615 dt.append(datetime(
int(y), \
622 raise ValueError(
'year/month/day fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
625 elif 'date' in self.
data:
627 for d
in zip([
str(de)
for de
in self.
data[
'date']]):
628 da = re.search(
"(\d{4})(\d{2})(\d{2})", d)
630 dt.append(datetime(
int(da.group(1)), \
637 raise ValueError(
'date field not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
640 elif 'year' in self.
data and \
643 for y,sdy
in zip(self.
data[
'year'], self.
data[
'sdy']):
646 dt.append(datetime(
int(y), \
653 raise ValueError(
'year/sdy fields not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
656 elif 'start_date' in self.
headers and 'start_time' in self.
headers:
658 da = re.search(
"(\d{4})(\d{2})(\d{2})", self.
headers[
'start_date'])
659 ti = re.search(
"(\d{1,2})\:(\d{2})\:(\d{2})\[(gmt|GMT)\]", self.
headers[
'start_time'])
662 dt.append(datetime(
int(da.group(1)), \
669 raise ValueError(
'/start_date and /start_time headers not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
672 elif 'start_date' in self.
headers:
674 da = re.search(
"(\d{4})(\d{2})(\d{2})", self.
headers[
'start_date'])
677 dt.append(datetime(
int(da.group(1)), \
684 raise ValueError(
'/start_date header not formatted correctly; unable to parse in file: {:}'.format(self.
filename))
688 print(
'Warning: fd_datetime failed -- file must contain a valid date and time information')
696 from copy
import deepcopy
706 for i
in range(drow + 1):
709 for var
in self.
data:
719 if var_name
not in self.
data:
722 self.
headers[
'units'] = self.
headers[
'units'] +
',' + units.lower()
724 print(
'Warning: no units found in SeaBASS file header')
730 self.
data[var_name][irow] = var_value
733 self.
data[var_name][irow] = var_value
736 self.
data[var_name][irow] = var_value
739 self.
data[var_name][irow] = var_value
748 writeSBfile writes out an SeaBASS file
749 given an output file name
750 syntax: SELF.writeSBfile(ofile)
752 from math
import isnan
754 fout = open(ofile,
'w')
756 fout.write(
'/begin_header\n')
759 fout.write(
'/' + header +
'=' + self.
headers[header] +
'\n')
762 fout.write(
'!' + comment +
'\n')
764 fout.write(
'/end_header\n')
766 if 'comma' in self.
headers[
'delimiter']:
768 elif 'space' in self.
headers[
'delimiter']:
770 elif 'tab' in self.
headers[
'delimiter']:
776 for var
in self.
data:
782 row_ls.append(
str(self.
data[var][i]))
787 row_ls.append(
str(self.
data[var][i]))
789 fout.write(delim.join(row_ls) +
'\n')
def addDataToOutput(self, irow, var_name, units, var_value, overwrite)
def writeSBfile(self, ofile)
def __init__(self, filename, mask_missing=True, mask_above_detection_limit=True, mask_below_detection_limit=True, no_warn=False)