5 A script to perform searches of the EarthData Common Metadata Repository (CMR)
6 for satellite granule names and download links.
7 written by J.Scott on 2016/12/12 (joel.scott@nasa.gov)
9 Modified by Inia Soto on 2020/08/17 (inia.m.sotoramos@nasa.gov)
10 Replaced wget function with python requests
17 from datetime
import timedelta
18 from math
import isnan
19 from collections
import OrderedDict
21 parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,description=
'''\
22 This program perform searches of the EarthData Search (https://search.earthdata.nasa.gov/search) Common Metadata
23 Repository (CMR) for satellite granule names given an OB.DAAC satellite/instrument and lat/lon/time point or range.
26 1) a list of OB.DAAC L2 satellite file granule names that contain the input criteria, per the CMR's records.
27 2) a list of public download links to fetch the matching satellite file granules, per the CMR's records.
30 The argument-list is a set of --keyword value pairs.
33 fd_matchup.py --sat=modist --slat=23.0 --slon=170.0 --stime=2015-11-16T09:00:00Z --max_time_diff=8
34 fd_matchup.py --sat=modist --stime=2015-11-15T09:00:00Z --etime=2015-11-17T09:00:00Z --slat=23.0 --elat=25.0 --slon=170.0 --elon=175.0
35 fd_matchup.py --sat=modist --max_time_diff=4 --seabass_file=[your SB file name].sb
36 fd_matchup.py --sat=modist --slat=23.0 --slon=170.0 --stime=2015-11-16T09:00:00Z --max_time_diff=8 --get_data=[Your path]
39 * This script is designed to work with files that have been properly
40 formatted according to SeaBASS guidelines (i.e. Files that passed FCHECK).
41 Some error checking is performed, but improperly formatted input files
42 could cause this script to error or behave unexpectedly. Files
43 downloaded from the SeaBASS database should already be properly formatted,
44 however, please email seabass@seabass.gsfc.nasa.gov and/or the contact listed
45 in the metadata header if you identify problems with specific files.
47 * It is always HIGHLY recommended that you check for and read any metadata
48 header comments and/or documentation accompanying data files. Information
49 from those sources could impact your analysis.
51 * Compatibility: This script was developed for Python 3.5.
53 * Requires a valid .netrc file in the user home ($HOME), e.g.:
54 machine urs.earthdata.nasa.gov login USERNAME password PASSWD
58 parser.add_argument(
'--sat', nargs=1, required=
True, type=str, choices=[
'modisa',
'modist',
'viirsn',
'viirsj1',
'goci',
'meris',
'czcs',
'octs',
'seawifs'], help=
'''\
59 String specifier for satellite platform/instrument
63 modisa = MODIS on AQUA
64 modist = MODIS on TERRA
65 viirsn = VIIRS on Suomi-NPP
66 viirsj1 = VIIRS on JPSS-1/NOAA-20
67 meris = MERIS on ENVISAT
69 czcs = CZCS on Nimbus-7
70 seawifs = SeaWiFS on OrbView-2
71 octs = OCTS on ADEOS-I
74 parser.add_argument(
'--data_type', nargs=1, type=str, default=([
'*']), choices=[
'oc',
'iop',
'sst'], help=
'''\
75 OPTIONAL: String specifier for satellite data type
76 Default behavior returns all product suites
80 oc = Returns OC (ocean color) product suite
81 iop = Returns IOP (inherent optical properties) product suite
82 sst = Returns SST product suite (including SST4 where applicable)
85 parser.add_argument(
'--slat', nargs=1, type=float, help=(
'''\
86 Starting latitude, south-most boundary
87 If used with --seabass_file, will override lats in the file
88 Valid values: (-90,90N)
91 parser.add_argument(
'--elat', nargs=1, type=float, help=(
'''\
92 Ending latitude, north-most boundary
93 If used with --seabass_file and --slat, will override lats in the file
94 Valid values: (-90,90N)
97 parser.add_argument(
'--slon', nargs=1, type=float, help=(
'''\
98 Starting longitude, west-most boundary
99 If used with --seabass_file, will override lons in the file
100 Valid values: (-180,180E)
103 parser.add_argument(
'--elon', nargs=1, type=float, help=(
'''\
104 Ending longitude, east-most boundary
105 If used with --seabass_file and --slon, will override lons in the file
106 Valid values: (-180,180E)
109 parser.add_argument(
'--stime', nargs=1, type=str, help=
'''\
110 Time (point) of interest in UTC
111 Default behavior: returns matches within +/- MAX_TIME_DIFF (default +/-3 hours) about this given time
112 If used with ETIME, this creates a search time window, between STIME and ETIME.
113 Valid format: string of the form: yyyy-mm-ddThh:mm:ssZ
114 OPTIONALLY: Use with --max_time_diff or --etime
117 parser.add_argument(
'--max_time_diff', nargs=1, type=float, default=([3]), help=(
'''\
118 Maximum time difference between satellite and in situ point
119 OPTIONAL: default value +/-3 hours
120 Valid values: decimal number of hours (0-36)
121 Use with --seabass_file OR --stime
124 parser.add_argument(
'--etime', nargs=1, type=str, help=
'''\
125 Maximum time (range) of interest in UTC
126 Valid format: string of the form: yyyy-mm-ddThh:mm:ssZ
130 parser.add_argument(
'--seabass_file', nargs=
'+', type=argparse.FileType(
'r'), help=
'''\
131 Valid SeaBASS file name or list of file names
132 File must contain latitude, longitude, and date-time information as fields.
135 parser.add_argument(
'--get_data', nargs=1, type=str, help=
'''\
136 Flag to download all identified satellite granules.
137 Requires the use of an HTTP request.
138 Set to the desired output directory.
141 parser.add_argument(
'--verbose', default=
False, action=
'store_true', help=(
'''\
142 OPTIONAL: Displays HTTP requests for each Earthdata CMR query.
145 args=parser.parse_args()
148 parser.error(
"you must specify an satellite string to conduct a search")
151 sat = dict_args[
'sat'][0]
155 dict_plat[
'modisa'] = [
'MODIS',
'AQUA',
'MODISA_L2_']
156 dict_plat[
'modist'] = [
'MODIS',
'TERRA',
'MODIST_L2_']
157 dict_plat[
'viirsn'] = [
'VIIRS',
'Suomi-NPP',
'VIIRSN_L2_']
158 dict_plat[
'viirsj1'] = [
'VIIRS',
'NOAA-20',
'VIIRSJ1_L2_']
159 dict_plat[
'meris'] = [
'MERIS',
'ENVISAT',
'MERIS_L2_']
160 dict_plat[
'goci'] = [
'GOCI',
'COMS',
'GOCI_L2_']
161 dict_plat[
'czcs'] = [
'CZCS',
'Nimbus-7',
'CZCS_L2_']
162 dict_plat[
'seawifs'] = [
'SeaWiFS',
'OrbView-2',
'SeaWiFS_L2_']
163 dict_plat[
'octs'] = [
'OCTS',
'ADEOS-I',
'OCTS_L2_']
165 if sat
not in dict_plat:
166 parser.error(
'you provided an invalid satellite string specifier. Use -h flag to see a list of valid options for --sat')
169 if not dict_args[
'get_data'][0]
or not os.path.exists(dict_args[
'get_data'][0]):
170 parser.error(
'invalid --get_data target download directory provided.')
172 if dict_args[
'max_time_diff'][0] < 0
or dict_args[
'max_time_diff'][0] > 36:
173 parser.error(
'invalid --max_time_diff value provided. Please specify an value between 0 and 36 hours. Received --max_time_diff = ' +
str(dict_args[
'max_time_diff'][0]))
175 twin_Hmin = -1 *
int(dict_args[
'max_time_diff'][0])
176 twin_Mmin = -60 * (dict_args[
'max_time_diff'][0] -
int(dict_args[
'max_time_diff'][0]))
177 twin_Hmax = 1 *
int(dict_args[
'max_time_diff'][0])
178 twin_Mmax = 60 * (dict_args[
'max_time_diff'][0] -
int(dict_args[
'max_time_diff'][0]));
181 if args.seabass_file:
183 for filein_sb
in dict_args[
'seabass_file']:
187 granlinks = OrderedDict()
190 if args.slat
and args.slon
and args.elat
and args.elon:
201 for dt
in ds.datetime:
202 tim_min = dt + timedelta(hours=twin_Hmin,minutes=twin_Mmin)
203 tim_max = dt + timedelta(hours=twin_Hmax,minutes=twin_Mmax)
205 url =
'https://cmr.earthdata.nasa.gov/search/granules.json?page_size=2000' + \
206 '&provider=OB_DAAC' + \
207 '&bounding_box=' +
str(dict_args[
'slon'][0]) +
',' +
str(dict_args[
'slat'][0]) +
',' + \
208 str(dict_args[
'elon'][0]) +
',' +
str(dict_args[
'elat'][0]) + \
209 '&instrument=' + dict_plat[sat][0] + \
210 '&platform=' + dict_plat[sat][1] + \
211 '&short_name=' + dict_plat[sat][2] + dict_args[
'data_type'][0] + \
212 '&options[short_name][pattern]=true' + \
213 '&temporal=' + tim_min.strftime(
'%Y-%m-%dT%H:%M:%SZ') +
',' + tim_max.strftime(
'%Y-%m-%dT%H:%M:%SZ') + \
214 '&sort_key=short_name'
216 if dict_args[
'verbose']:
223 elif args.slat
and args.slon
and not args.elat
and not args.elon:
228 for dt
in ds.datetime:
229 tim_min = dt + timedelta(hours=twin_Hmin,minutes=twin_Mmin)
230 tim_max = dt + timedelta(hours=twin_Hmax,minutes=twin_Mmax)
232 url =
'https://cmr.earthdata.nasa.gov/search/granules.json?page_size=2000' + \
233 '&provider=OB_DAAC' + \
234 '&point=' +
str(dict_args[
'slon'][0]) +
',' +
str(dict_args[
'slat'][0]) + \
235 '&instrument=' + dict_plat[sat][0] + \
236 '&platform=' + dict_plat[sat][1] + \
237 '&short_name=' + dict_plat[sat][2] + dict_args[
'data_type'][0] + \
238 '&options[short_name][pattern]=true' + \
239 '&temporal=' + tim_min.strftime(
'%Y-%m-%dT%H:%M:%SZ') +
',' + tim_max.strftime(
'%Y-%m-%dT%H:%M:%SZ') + \
240 '&sort_key=short_name'
242 if dict_args[
'verbose']:
252 for lat,lon,dt
in zip(ds.lat,ds.lon,ds.datetime):
253 if isnan(lat)
or isnan(lon):
258 tim_min = dt + timedelta(hours=twin_Hmin,minutes=twin_Mmin)
259 tim_max = dt + timedelta(hours=twin_Hmax,minutes=twin_Mmax)
261 url =
'https://cmr.earthdata.nasa.gov/search/granules.json?page_size=2000' + \
262 '&provider=OB_DAAC' + \
263 '&point=' +
str(lon) +
',' +
str(lat) + \
264 '&instrument=' + dict_plat[sat][0] + \
265 '&platform=' + dict_plat[sat][1] + \
266 '&short_name=' + dict_plat[sat][2] + dict_args[
'data_type'][0] + \
267 '&options[short_name][pattern]=true' + \
268 '&temporal=' + tim_min.strftime(
'%Y-%m-%dT%H:%M:%SZ') +
',' + tim_max.strftime(
'%Y-%m-%dT%H:%M:%SZ') + \
269 '&sort_key=short_name'
271 if dict_args[
'verbose']:
278 print_CMRreq(hits, granlinks, dict_plat[sat], args, dict_args, filein_sb.name)
284 granlinks = OrderedDict()
287 if args.stime
and not args.etime:
288 dt =
check_time(parser, dict_args[
'stime'][0])
290 tim_min = dt + timedelta(hours=twin_Hmin,minutes=twin_Mmin)
291 tim_max = dt + timedelta(hours=twin_Hmax,minutes=twin_Mmax)
293 elif args.stime
and args.etime:
294 tim_min =
check_time(parser, dict_args[
'stime'][0])
295 tim_max =
check_time(parser, dict_args[
'etime'][0])
300 parser.error(
'invalid time: All time inputs MUST be in UTC. Must receive --stime=YYYY-MM-DDTHH:MM:SSZ')
303 if args.slat
and args.slon
and not args.elat
and not args.elon:
307 url =
'https://cmr.earthdata.nasa.gov/search/granules.json?page_size=2000' + \
308 '&provider=OB_DAAC' + \
309 '&point=' +
str(dict_args[
'slon'][0]) +
',' +
str(dict_args[
'slat'][0]) + \
310 '&instrument=' + dict_plat[sat][0] + \
311 '&platform=' + dict_plat[sat][1] + \
312 '&short_name=' + dict_plat[sat][2] + dict_args[
'data_type'][0] + \
313 '&options[short_name][pattern]=true' + \
314 '&temporal=' + tim_min.strftime(
'%Y-%m-%dT%H:%M:%SZ') +
',' + tim_max.strftime(
'%Y-%m-%dT%H:%M:%SZ') + \
315 '&sort_key=short_name'
317 if dict_args[
'verbose']:
322 elif args.slat
and args.elat
and args.slon
and args.elon:
332 url =
'https://cmr.earthdata.nasa.gov/search/granules.json?page_size=2000' + \
333 '&provider=OB_DAAC' + \
334 '&bounding_box=' +
str(dict_args[
'slon'][0]) +
',' +
str(dict_args[
'slat'][0]) +
',' + \
335 str(dict_args[
'elon'][0]) +
',' +
str(dict_args[
'elat'][0]) + \
336 '&instrument=' + dict_plat[sat][0] + \
337 '&platform=' + dict_plat[sat][1] + \
338 '&short_name=' + dict_plat[sat][2] + dict_args[
'data_type'][0] + \
339 '&options[short_name][pattern]=true' + \
340 '&temporal=' + tim_min.strftime(
'%Y-%m-%dT%H:%M:%SZ') +
',' + tim_max.strftime(
'%Y-%m-%dT%H:%M:%SZ') + \
341 '&sort_key=short_name'
343 if dict_args[
'verbose']:
349 parser.error(
'invalid combination of --slat and --slon OR --slat, --elat, --slon, and --elon arguments provided. All latitude inputs MUST be between -90/90N deg. All longitude inputs MUST be between -180/180E deg.')
358 """ function to verify SB file exists, is valid, and has correct fields; returns data structure """
360 from seabass.SB_support
import readSB
362 if os.path.isfile(file_sb):
363 ds = readSB(filename=file_sb,
365 mask_above_detection_limit=
True,
366 mask_below_detection_limit=
True,
369 parser.error(
'ERROR: invalid --seabass_file specified. Does: ' + file_sb +
' exist?')
371 ds.datetime = ds.fd_datetime()
373 parser.error(
'missing fields in SeaBASS file. File must contain date/time, date/hour/minute/second, year/month/day/time, OR year/month/day/hour/minute/second')
379 """ function to verify lat/lon exist in SB file's data structure """
382 from numpy
import mean
387 if 'lat' in ds.data
and 'lon' in ds.data:
389 for lat,lon
in zip(ds.data[
'lat'],ds.data[
'lon']):
392 ds.lat.append(
float(lat))
393 ds.lon.append(
float(lon))
395 elif 'north_latitude' in ds.headers
and \
396 'south_latitude' in ds.headers
and \
397 'west_longitude' in ds.headers
and \
398 'east_longitude' in ds.headers:
400 lat_n = re.search(
"([+|-]?\d*\.?\d*)\[(deg|DEG)\]", ds.headers[
'north_latitude'])
401 lat_s = re.search(
"([+|-]?\d*\.?\d*)\[(deg|DEG)\]", ds.headers[
'south_latitude'])
402 lon_w = re.search(
"([+|-]?\d*\.?\d*)\[(deg|DEG)\]", ds.headers[
'west_longitude'])
403 lon_e = re.search(
"([+|-]?\d*\.?\d*)\[(deg|DEG)\]", ds.headers[
'east_longitude'])
409 ds.lat.append(
mean([
float(lat_n.group(1)),
float(lat_s.group(1))]))
410 ds.lon.append(
mean([
float(lon_w.group(1)),
float(lon_e.group(1))]))
414 parser.error(
'/north_latitude, /south_latitude, /west_longitude, or /east_longitude headers not formatted correctly; unable to parse in file: {:}'.format(ds.filename))
418 parser.error(
'missing headers/fields in SeaBASS file. File must contain lat,lon information')
424 """ function to verify lat range """
426 parser.error(
'invalid latitude: all LAT values MUST be between -90/90N deg. Received: ' +
str(lat))
431 """ function to verify lon range """
433 parser.error(
'invalid longitude: all LON values MUST be between -180/180E deg. Received: ' +
str(lon))
437 """ function to verify two lats relative to each other """
439 parser.error(
'invalid latitude: --slat MUST be less than --elat. Received --slat = ' +
str(slat) +
' and --elat = ' +
str(elat))
444 """ function to verify two lons relative to each other """
446 parser.error(
'invalid longitude: --slon MUST be less than --elon. Received --slon = ' +
str(slon) +
' and --elon = ' +
str(elon))
451 """ function to verify time """
453 from datetime
import datetime
456 tims = re.search(
"(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z", tim);
457 dt = datetime(year=
int(tims.group(1)), \
458 month=
int(tims.group(2)), \
459 day=
int(tims.group(3)), \
460 hour=
int(tims.group(4)), \
461 minute=
int(tims.group(5)), \
462 second=
int(tims.group(6)))
464 parser.error(
'invalid time: All time inputs MUST be in UTC in the form: YYYY-MM-DDTHH:MM:SSZ Received: ' + tim)
469 """ function to verify two times relative to each other """
470 if tim_min > tim_max:
471 parser.error(
'invalid time: --stime MUST be less than --etime. Received --stime = ' + \
472 tim_min.strftime(
'%Y-%m-%dT%H:%M:%SZ') +
' and --etime = ' + \
473 tim_max.strftime(
'%Y-%m-%dT%H:%M:%SZ'))
478 """ function to submit a given URL request to the CMR; return JSON output """
481 req = requests.get(url)
500 """ function to process the return from a single CMR JSON return """
503 hits = hits + len(content[
'feed'][
'entry'])
504 for entry
in content[
'feed'][
'entry']:
505 granid = entry[
'producer_granule_id']
506 granlinks[granid] = entry[
'links'][0][
'href']
508 print(
'WARNING: No matching granules found for a row. Continuing to search for granules from the rest of the input file...')
510 return hits, granlinks
515 download_file downloads a file
516 given URL and out_dir strings
517 syntax fname_local = download_file(url, out_dir)
521 from pathlib
import Path
523 out_path = Path(out_dir).expanduser()
524 path_local = out_path / url.split(
'/')[-1]
526 print(
'Downloading',url.split(
'/')[-1],
'to',out_path)
534 if not out_path.exists():
535 os.makedirs(
str(out_dir))
537 if path_local.exists():
539 print(f
'{path_local.name} found - deleting and attempting to re-download...')
545 print(f
'downloading {path_local.name}')
548 req2 = requests.get(url)
549 with open(path_local.as_posix(),
'wb')
as f:
550 f.write(req2.content)
552 except Exception
as e:
554 print(
'Error in download_file:',e)
558 print(
'Unknown error:', e)
560 if not path_local.exists():
561 print(
'Error in download_file: local copy of file not found; download unsuccessful')
564 print(
'Successfully downloaded',url.split(
'/')[-1],
'to',out_path)
566 return str(path_local)
570 """" function to print the CMR results from a SB file """
574 for granid
in granlinks:
575 unique_hits = unique_hits + 1
577 print(
'Matching ' + plat_ls[1] +
'/' + plat_ls[0] +
' granule (' + granid +
') found for: ' + sbfile)
579 if args.get_data
and dict_args[
'get_data'][0]:
580 fname_loc =
download_file(granlinks[granid], dict_args[
'get_data'][0])
582 print(
'Download link: ' + granlinks[granid])
586 print(
'Number of granules found: ' +
str(unique_hits))
590 print(
'WARNING: No granules found for ' + plat_ls[1] +
'/' + plat_ls[0] +
' and any lat/lon/time inputs.')
596 """ function to process AND print the return from a single CMR JSON return """
599 hits = len(content[
'feed'][
'entry'])
600 for entry
in content[
'feed'][
'entry']:
601 granid = entry[
'producer_granule_id']
602 granlinks[granid] = entry[
'links'][0][
'href']
604 print(
'Matching ' + plat_ls[1] +
'/' + plat_ls[0] +
' granule (' + granid +
') found.')
606 if args.get_data
and dict_args[
'get_data'][0]:
607 fname_loc =
download_file(granlinks[granid], dict_args[
'get_data'][0])
609 print(
'Download link: ' + granlinks[granid])
613 print(
'Number of granules found: ' +
str(hits))
617 print(
'WARNING: No matching granules found for ' + plat_ls[1] +
'/' + plat_ls[0] + \
618 ' containing the requested lat/lon area during the ' + \
619 str(dict_args[
'max_time_diff'][0]) +
'-hr window of ' + \
620 tim_min.strftime(
'%Y-%m-%dT%H:%M:%SZ') +
' to ' + tim_max.strftime(
'%Y-%m-%dT%H:%M:%SZ'))
625 if __name__ ==
"__main__":
main()