14 from contextlib
import closing
15 from datetime
import datetime, timedelta, date
17 from requests.adapters
import HTTPAdapter
20 DEFAULT_BASE_URL =
"https://oceandata.sci.gsfc.nasa.gov/manifest/tags"
21 MANIFEST_BASENAME =
"manifest.json"
40 DEFAULT_CHUNK_SIZE = 131072
51 logging.basicConfig(level=logging.DEBUG)
53 obpgSession = requests.Session()
54 obpgSession.mount(
'https://', HTTPAdapter(max_retries=ntries))
57 print(
"OBPG session started")
60 print(
"reusing existing OBPG session")
67 ctype = req.headers.get(
'Content-Type')
68 if ctype
and ctype.startswith(
'text/html'):
69 if "<title>Earthdata Login</title>" in req.text:
75 def httpdl(server, request, localpath='.', outputfilename=None, ntries=5,
76 uncompress=False, timeout=30., verbose=0, force_download=False,
77 chunk_size=DEFAULT_CHUNK_SIZE):
80 urlStr =
'https://' + server + request
89 if not force_download:
91 ofile = os.path.join(localpath, outputfilename)
94 ofile = os.path.join(localpath, os.path.basename(request.rstrip()))
98 headers = {
"If-Modified-Since":modified_since.strftime(
"%a, %d %b %Y %H:%M:%S GMT")}
100 with closing(obpgSession.get(urlStr, stream=
True, timeout=timeout, headers=headers))
as req:
102 if req.status_code != 200:
103 status = req.status_code
107 if not os.path.exists(localpath):
109 os.makedirs(localpath, mode=0o2775)
111 if not outputfilename:
112 cd = req.headers.get(
'Content-Disposition')
114 outputfilename = re.findall(
"filename=(.+)", cd)[0]
116 outputfilename = urlStr.split(
'/')[-1]
118 ofile = os.path.join(localpath, outputfilename)
122 if 'last-modified' in req.headers:
123 remote_lmt = req.headers[
'last-modified']
124 remote_ftime = datetime.strptime(remote_lmt,
"%a, %d %b %Y %H:%M:%S GMT").replace(tzinfo=
None)
125 if modified_since
and not force_download:
126 if (remote_ftime - modified_since).total_seconds() < 0:
129 print(
"Skipping download of %s" % outputfilename)
132 with open(ofile,
'wb')
as fd:
133 for chunk
in req.iter_content(chunk_size=chunk_size):
137 if uncompress
and re.search(
".(Z|gz|bz2)$", ofile):
140 status = compressStatus
158 compProg = {
"gz":
"gunzip -f ",
"Z":
"gunzip -f ",
"bz2":
"bunzip2 -f "}
159 exten = os.path.basename(compressed_file).split(
'.')[-1]
160 unzip = compProg[exten]
161 p = subprocess.Popen(unzip + compressed_file, shell=
True)
162 status = os.waitpid(p.pid, 0)[1]
164 print(
"Warning! Unable to decompress %s" % compressed_file)
173 if not os.path.isfile(localFile):
174 localFile = re.sub(
r".(Z|gz|bz2)$",
'', localFile)
176 if os.path.isfile(localFile):
177 ftime = datetime.fromtimestamp(os.path.getmtime(localFile))
182 parser = argparse.ArgumentParser()
183 parser.set_defaults(func=download)
184 subparsers = parser.add_subparsers()
186 _add_subparser_reprint(subparsers)
187 _add_subparser_update_file(subparsers)
188 _add_subparser_add_tag(subparsers)
189 _add_subparser_get_value(subparsers)
190 _add_subparser_get_first_tag(subparsers)
191 _add_subparser_list(subparsers)
192 _add_subparser_clean(subparsers)
193 _add_subparser_download(subparsers)
194 _add_subparser_generate(subparsers)
195 _add_subparser_list_tags(subparsers)
197 options, args = parser.parse_known_args()
198 return options.func(options, args)
200 def _add_subparser_reprint(subparsers):
201 parser_reprint = subparsers.add_parser(
'reprint')
202 parser_reprint.add_argument(
"manifest", help=
"manifest to reprint")
203 parser_reprint.set_defaults(func=reprint)
204 if os.path.isfile(MANIFEST_BASENAME):
205 parser_reprint.set_defaults(manifest=MANIFEST_BASENAME)
207 def _add_subparser_update_file(subparsers):
208 parser_update_file = subparsers.add_parser(
'update-file')
209 parser_update_file.add_argument(
"manifest", help=
"manifest to update")
210 parser_update_file.add_argument(
"path", help=
"file to update")
211 parser_update_file.set_defaults(func=update_file)
212 if os.path.isfile(MANIFEST_BASENAME):
213 parser_update_file.set_defaults(manifest=MANIFEST_BASENAME)
215 def _add_subparser_add_tag(subparsers):
216 parser_add_tag = subparsers.add_parser(
'add-tag')
217 parser_add_tag.add_argument(
"-m",
"--manifest", help=
"manifest to update")
218 parser_add_tag.add_argument(
"tag", help=
"tag to add to tags attribute")
219 parser_add_tag.set_defaults(func=add_tag)
220 if os.path.isfile(MANIFEST_BASENAME):
221 parser_add_tag.set_defaults(manifest=MANIFEST_BASENAME)
223 def _add_subparser_get_value(subparsers):
224 parser_get_value = subparsers.add_parser(
'get-value')
225 parser_get_value.add_argument(
"-m",
"--manifest", help=
"manifest from which to retrieve the value")
226 parser_get_value.add_argument(
"xpath", help=
"key to print, colon separated for nested values")
227 parser_get_value.set_defaults(func=get_value)
228 if os.path.isfile(MANIFEST_BASENAME):
229 parser_get_value.set_defaults(manifest=MANIFEST_BASENAME)
231 def _add_subparser_get_first_tag(subparsers):
232 parser_get_first_tag = subparsers.add_parser(
'get-first-tag')
233 parser_get_first_tag.add_argument(
"-m",
"--manifest", help=
"manifest from which to retrieve the first tag")
234 parser_get_first_tag.set_defaults(func=get_first_tag)
235 if os.path.isfile(MANIFEST_BASENAME):
236 parser_get_first_tag.set_defaults(manifest=MANIFEST_BASENAME)
238 def _add_subparser_list(subparsers):
239 parser_list = subparsers.add_parser(
'list')
240 parser_list.add_argument(
"manifest", help=
"manifest to list")
241 parser_list.add_argument(
"-i",
"--info", action=
"store_const", const=1, help=
"include extra info")
242 parser_list.add_argument(
"-t",
"--tag", help=
"tag to list files for")
243 parser_list.set_defaults(func=list)
244 if os.path.isfile(MANIFEST_BASENAME):
245 parser_list.set_defaults(manifest=MANIFEST_BASENAME)
247 def _add_subparser_clean(subparsers):
248 parser_clean = subparsers.add_parser(
'clean')
249 parser_clean.add_argument(
"-d",
"--dry-run", action=
"store_const", const=1, help=
"don't actually delete files")
250 parser_clean.add_argument(
"directory", default=
".", nargs=
'?', help=
"directory to clean (must contain %s)" % MANIFEST_BASENAME)
251 parser_clean.add_argument(
"-e",
"--exclude", nargs=
"+", action=
'append', help=
"relative paths to ignore")
252 parser_clean.add_argument(
"-i",
"--include", nargs=
"+", action=
'append', help=
"relative paths to include (ignore *)")
253 parser_clean.add_argument(
"-v",
"--verbose", action=
"count", default=0, help=
"increase output verbosity")
254 parser_clean.set_defaults(func=clean)
256 def _add_subparser_download(subparsers):
257 parser_download = subparsers.add_parser(
'download')
258 parser_download.add_argument(
"-d",
"--dest-dir", help=
"destination directory")
259 parser_download.add_argument(
"-t",
"--tag", help=
"tag to download")
260 parser_download.add_argument(
"-b",
"--base-url", default=DEFAULT_BASE_URL, help=
"base URL")
261 parser_download.add_argument(
"-n",
"--name", help=
"bundle name")
262 parser_download.add_argument(
"--chunk-size", type=int, default=DEFAULT_CHUNK_SIZE, help=
"download chunk size")
263 parser_download.add_argument(
"-s",
"--save-dir", help=
"save a copy of the manifest files to this directory")
264 parser_download.add_argument(
"-l",
"--local-dir", help=
"directory containing local manifest files")
265 parser_download.add_argument(
"-w",
"--wget", default=
False, action=
"store_true", help=
"use wget to download")
266 parser_download.add_argument(
"-v",
"--verbose", action=
"count", default=0, help=
"increase output verbosity")
267 parser_download.add_argument(
"files", action=
"append", nargs=
"*", default=
None, type=str, help=
"files to download if needed")
269 parser_download.set_defaults(func=download)
270 parser_download.set_defaults(dest_dir=
".")
272 def _add_subparser_generate(subparsers):
273 parser_gen = subparsers.add_parser(
'generate')
274 parser_gen.add_argument(
"-b",
"--base-manifest", help=
"base manifest file")
275 parser_gen.add_argument(
"-c",
"--checksum-bytes", default=1000000, help=
"how many bytes to checksum per file")
276 parser_gen.add_argument(
"-t",
"--tag", required=
True, help=
"new tag for manifest")
277 parser_gen.add_argument(
"-f",
"--force", action=
"store_const", const=1, help=
"generate manifest despite warnings")
278 parser_gen.add_argument(
"-e",
"--exclude", nargs=
"+", action=
'append', help=
"relative paths to ignore")
279 parser_gen.add_argument(
"-i",
"--include", nargs=
"+", action=
'append', help=
"relative paths to include (ignore *)")
280 parser_gen.add_argument(
"-n",
"--name", help=
"bundle name")
281 parser_gen.add_argument(
"directory", help=
"directory to generate a manifest for")
282 parser_gen.set_defaults(func=generate)
284 def _add_subparser_list_tags(subparsers):
285 parser_list_tags = subparsers.add_parser(
'list_tags')
286 parser_list_tags.add_argument(
"-b",
"--base-url", default=DEFAULT_BASE_URL, help=
"base URL")
287 parser_list_tags.add_argument(
"--chunk-size", type=int, default=DEFAULT_CHUNK_SIZE, help=
"download chunk size")
288 parser_list_tags.add_argument(
"-w",
"--wget", default=
False, action=
"store_true", help=
"use wget to download")
289 parser_list_tags.add_argument(
"-v",
"--verbose", action=
"count", default=0, help=
"increase output verbosity")
290 parser_list_tags.set_defaults(func=list_tags)
293 options = argparse.Namespace(
297 base_url=DEFAULT_BASE_URL,
299 chunk_size=DEFAULT_CHUNK_SIZE,
308 proc = subprocess.run(command, shell=
True)
309 if proc.returncode != 0:
310 print(
"Error: return =", proc.returncode,
": trying to run command =", command)
314 with open(options.manifest,
'rb')
as manifest:
315 manifest = json.load(manifest)
316 print(json.dumps(manifest, indent=4, sort_keys=
True))
319 with open(options.manifest,
'rb')
as manifest:
320 manifest = json.load(manifest)
321 current_entry = manifest[
'files'].get(options.path)
322 if os.path.islink(options.path):
323 linkValue = os.readlink(options.path)
324 if not current_entry
or current_entry.get(
"symlink") != linkValue:
325 info = {
"symlink": linkValue,
"tag": options.tag}
326 manifest[
'files'][options.path] = info
328 checksum = _get_checksum(manifest, options.path)
329 if not current_entry
or current_entry.get(
'checksum') != checksum:
331 "checksum": checksum,
332 "size": os.stat(options.path).st_size,
333 "mode": os.stat(options.path).st_mode,
334 "tag": manifest[
'tag']
336 manifest[
'files'][options.path] = info
338 print(json.dumps(manifest, indent=4, sort_keys=
True))
341 with open(options.manifest,
'rb')
as manifest:
342 manifest = json.load(manifest)
343 if options.tag
not in manifest[
"tags"]:
344 manifest[
"tags"].append(options.tag)
346 print(
"%s is already in the tags attribute" % (options.tag), file=sys.stderr)
348 print(json.dumps(manifest, indent=4, sort_keys=
True))
351 with open(options.manifest,
'rb')
as manifest:
352 manifest = json.load(manifest)
353 for part
in options.xpath.split(
":"):
355 manifest = manifest[part]
357 print(
"Path not found, invalid part: %s" % part)
362 with open(options.manifest,
'rb')
as manifest:
363 manifest = json.load(manifest)
364 print(manifest[
'tags'][0])
369 for root, _, files
in os.walk(
".", followlinks=
True):
372 name = root[2:]+
'/'+f
379 for exclude
in excludeList:
380 if exclude[0] ==
"." or name.startswith(exclude[0]):
383 for include
in includeList:
384 if name.startswith(include[0]):
390 if "__pycache__" not in name:
391 allFiles.append(name)
396 os.chdir(options.directory)
400 for exclude
in options.exclude:
401 if exclude[0] ==
".":
404 if not os.path.isfile(MANIFEST_BASENAME):
405 print(
"directory needs to contain a", MANIFEST_BASENAME)
408 with open(MANIFEST_BASENAME,
'rb')
as manifest:
409 manifest = json.load(manifest)
410 files = manifest[
"files"]
411 for f
in getFileList(options.exclude, options.include):
412 if f == MANIFEST_BASENAME:
415 if options.verbose
or options.dry_run:
416 print(
"cleaning %s" % (f))
417 if not options.dry_run:
421 if os.path.isdir(options.manifest):
422 options.manifest =
"%s/%s" % (options.manifest, MANIFEST_BASENAME)
423 with open(options.manifest,
'rb')
as manifest:
424 manifest = json.load(manifest)
426 for f, info
in manifest[
"files"].items():
427 if not options.tag
or info[
"tag"] == options.tag:
428 if info.get(
'symlink'):
429 print(
"%s %s, -> %s" % (f, info[
"tag"], info[
"symlink"]))
431 print(
"%s %s, %s bytes, %s" % (f, info[
"tag"], info[
"size"], info[
"checksum"]))
433 for f, info
in manifest[
"files"].items():
434 if info[
"tag"] == options.tag:
437 for f
in manifest[
"files"]:
441 if not options.base_manifest
and os.path.isfile(
"%s/%s" % (options.directory, MANIFEST_BASENAME)):
442 options.base_manifest =
"%s/%s" % (options.directory, MANIFEST_BASENAME)
445 if options.base_manifest
and os.path.isfile(options.base_manifest)
and os.path.getsize(options.base_manifest):
446 with open(options.base_manifest,
'rb')
as base_manifest:
447 manifest = json.load(base_manifest)
449 manifest = {
"checksum_bytes": options.checksum_bytes,
"tags": []}
451 manifest[
"tags"] = [options.tag]
453 os.chdir(options.directory)
455 all_files =
getFileList(options.exclude, options.include)
458 manifest[
'name'] = options.name
460 files_entries = manifest.get(
"files", {})
464 if "files" in manifest:
465 for path, info
in manifest[
"files"].items():
466 if path
not in all_files:
467 files_to_delete.append(path)
468 for path
in files_to_delete:
469 del files_entries[path]
472 if os.path.basename(f) == MANIFEST_BASENAME:
475 current_entry = files_entries.get(f)
476 if os.path.islink(f):
477 linkValue = os.readlink(f)
478 if not current_entry
or current_entry.get(
"symlink") != linkValue:
479 info = {
"symlink": linkValue,
"tag": options.tag}
480 files_entries[f] = info
482 fileSize = os.path.getsize(f)
483 checksum = _get_checksum(manifest, f)
484 if not current_entry
or current_entry.get(
'size') != fileSize
or current_entry.get(
'checksum') != checksum:
486 "checksum": checksum,
488 "mode": os.stat(f).st_mode,
491 files_entries[f] = info
492 manifest[
"files"] = files_entries
493 print(json.dumps(manifest, indent=4, sort_keys=
True))
497 manifest_filename =
"%s/%s" % (options.dest_dir, MANIFEST_BASENAME)
499 if not os.path.isdir(options.dest_dir):
500 os.makedirs(options.dest_dir)
502 if options.local_dir:
504 print(
"Error: Can not have --local_dir and --save_dir")
507 if not options.tag
or not options.name:
508 if not os.path.isfile(manifest_filename):
509 print(
"must have -t and -n or %s" % (manifest_filename))
511 with open(manifest_filename,
'rb')
as manifest:
512 manifest = json.load(manifest)
514 options.tag = manifest[
'tags'][-1]
516 options.name = manifest[
'name']
518 if not _download_file(options, MANIFEST_BASENAME):
521 with open(manifest_filename,
'rb')
as manifest:
522 manifest = json.load(manifest)
524 modified_files = _check_directory_against_manifest(options, options.dest_dir, manifest)
527 if options.files
and options.files[0]:
529 for f
in options.files[0]:
531 newList[f] = modified_files[f]
534 modified_files = newList
536 if not modified_files:
538 print(
"No files require downloading")
540 _download_files(options, modified_files)
543 for path, info
in manifest[
'files'].items():
544 if info.get(
'checksum'):
545 src =
"%s/%s" % (options.dest_dir, path)
546 dest =
"%s/%s/%s/%s" % (options.save_dir, info[
"tag"], options.name, path)
547 destDir = os.path.dirname(dest)
548 if not os.path.isdir(destDir):
550 shutil.copy(src, dest)
551 os.chmod(dest, info[
"mode"])
555 tempDir = tempfile.TemporaryDirectory(prefix=
"manifest-")
557 url = options.base_url +
"/"
559 command =
"cd %s; wget -q %s" % (tempDir.name, url)
562 parts = urllib.parse.urlparse(url)
565 status =
httpdl(host, request, localpath=tempDir.name,
566 outputfilename=
"index.html",
567 verbose=options.verbose,
569 chunk_size=options.chunk_size)
571 with open(
"%s/index.html" % (tempDir.name))
as f:
576 if "</body>" in line:
579 if line.startswith(
"<a href="):
580 parts = line.split(
'"')
581 s = parts[1].split(
"/")[0]
586 print(
"Error downloading list of tags : return code =", status)
595 if tag == options.tag:
599 def _get_checksum(manifest, path):
600 checksum = hashlib.sha256()
601 with open(path,
'rb')
as current_file:
602 checksum.update(current_file.read(manifest[
'checksum_bytes']))
603 return checksum.hexdigest()
605 def _check_directory_against_manifest(options, directory, manifest):
607 for path, info
in manifest[
'files'].items():
608 dest = os.path.join(directory, path)
609 if os.path.islink(dest):
610 if info.get(
'symlink') != os.readlink(dest):
611 modified_files[path] = info
612 elif os.path.isfile(dest):
613 if info.get(
'size') != os.path.getsize(dest)
or info.get(
'checksum') != _get_checksum(manifest, dest)
or info.get(
'mode') != os.stat(dest).st_mode:
614 modified_files[path] = info
616 modified_files[path] = info
617 return modified_files
619 def _download_file(options, fileName):
620 dest =
"%s/%s" % (options.dest_dir, fileName)
621 dest_dir = os.path.dirname(dest)
622 if not os.path.isdir(dest_dir):
623 os.makedirs(dest_dir)
625 if options.local_dir:
626 src =
"%s/%s/%s/%s" % (options.local_dir, options.tag, options.name, fileName)
628 print(
"Copying %s from %s" % (fileName, src))
629 shutil.copy(src, dest)
632 url =
"%s/%s/%s/%s" % (options.base_url, options.tag, options.name, fileName)
634 print(
"Downloading %s from %s" % (fileName, url))
636 if os.path.isfile(dest):
638 command =
"cd %s; wget -q %s" % (dest_dir, url)
641 parts = urllib.parse.urlparse(url)
645 status =
httpdl(host, request, localpath=dest_dir,
646 outputfilename=os.path.basename(dest),
647 verbose=options.verbose,
649 chunk_size=options.chunk_size)
651 print(
"Error downloading", dest,
": return code =", status)
655 src =
"%s/%s" % (options.dest_dir, fileName)
656 dest =
"%s/%s/%s/%s" % (options.save_dir, options.tag, options.name, fileName)
657 destDir = os.path.dirname(dest)
658 if not os.path.isdir(destDir):
660 shutil.copy(src, dest)
663 def _download_files(options, file_list):
664 if options.local_dir:
665 for path, info
in file_list.items():
666 dest =
"%s/%s" % (options.dest_dir, path)
667 dest_dir = os.path.dirname(dest)
668 if not os.path.isdir(dest_dir):
669 os.makedirs(dest_dir)
670 if info.get(
'checksum'):
671 src =
"%s/%s/%s/%s" % (options.local_dir, info[
"tag"], options.name, path)
672 shutil.copy(src, dest)
673 os.chmod(dest, info[
"mode"])
675 src = info[
'symlink']
676 os.symlink(src, dest)
680 if not os.path.isdir(options.dest_dir):
681 os.makedirs(options.dest_dir)
682 with tempfile.NamedTemporaryFile(prefix=
"manifest-")
as txt_file:
683 for path, info
in file_list.items():
684 if info.get(
'checksum'):
685 txt_file.write(
"%s\n" % path)
687 dest =
"%s/%s" % (options.dest_dir, path)
688 src = info[
'symlink']
689 os.symlink(src, dest)
690 command =
"cd %s; wget -x -nH -i %s --cut-dirs=3 --base=%s/%s/%s/" % (options.dest_dir, txt_file.name, options.base_url, info[
"tag"], options.name)
692 for path, info
in file_list.items():
693 if info.get(
'checksum'):
694 os.chmod(dest, info[
"mode"])
697 for path, info
in file_list.items():
698 dest =
"%s/%s" % (options.dest_dir, path)
699 dest_dir = os.path.dirname(dest)
700 if not os.path.isdir(dest_dir):
701 os.makedirs(dest_dir)
703 if info.get(
'checksum'):
704 if os.path.islink(dest)
or os.path.exists(dest):
706 url =
"%s/%s/%s/%s" % (options.base_url, info[
"tag"], options.name, path)
708 print(
"Downloading %s from %s" % (path, url))
709 parts = urllib.parse.urlparse(url)
712 status =
httpdl(host, request, localpath=dest_dir,
713 outputfilename=os.path.basename(dest),
714 verbose=options.verbose,
716 chunk_size=options.chunk_size)
718 os.chmod(dest, info[
"mode"])
720 print(
"Error downloading", dest,
": return code =", status)
722 src = info[
'symlink']
724 print(
"Making symlink %s -> %s" % (dest, src))
725 if os.path.islink(dest)
or os.path.exists(dest):
727 os.symlink(src, dest)
730 if __name__ ==
"__main__":