Go to the documentation of this file.
10 from urllib.parse
import urljoin, urlsplit, urlunsplit
14 return urlunsplit((parts.scheme, parts.netloc, parts.path,
None,
None))
19 Add query to urljoin() results
26 scheme, netloc, path, query, fragment = urlsplit(base)
27 query = urlsplit(url).query
28 return urlunsplit((scheme, netloc, path, query,
None))
33 Make the dangerous assumption that URLs
34 pointing to another web page always end in '/'.
41 def retry(func, *args, **kwargs):
43 Retry specified function call after a short delay
45 ntries = kwargs.get(
'ntries')
47 delay =
int(5 + (30. * (1. / (
float(ntries) + 1.))))
48 if kwargs.get(
'verbose'):
49 print(
'Sleeping {}s; {} tries left.'.format(delay, ntries - 1))
51 kwargs[
'ntries'] = ntries - 1
52 return func(*args, **kwargs)
57 Set modification time for specified file.
58 Set access time to "now".
62 os.utime(filepath, times=(atime, mtime))
64 os.utime(filepath, (atime, mtime))
71 Each link in list is a dictionary describing a remote file:
72 link['href'] = URL pointing to file
73 link['mtime'] = timestamp as seconds since the epoch
74 link['size'] = size in bytes
76 keys = [
'href',
'mtime',
'size']
79 link = dict(
list(zip(keys, row)))
87 Format remote file timestamp as seconds since the epoch.
90 urltime = time.strptime(mtime,
"%Y-%m-%d %H:%M:%S")
91 return time.mktime(urltime)
97 return linkdict(json.loads(content.decode(
'utf-8'))[
'rows'])
102 Returns False if filepath is present and size matches remote url;
103 True otherwise. Optionally check timestamp as well.
111 if not os.path.isfile(filepath):
115 diffsize = os.path.getsize(filepath) != link[
'size']
121 older = os.path.getmtime(filepath) < link[
'mtime']
122 return diffsize
or older
127 return response
and (
'json' in response.headers.get(
'Content-Type'))
131 return response
and (response.status < 400)
136 def __init__(self, timeout=5, max_tries=5, verbose=0, clobber=False):
145 parts = urlsplit(url)
146 outputdir = os.path.dirname(filepath)
147 status =
httpdl(parts.netloc, parts.path, localpath=outputdir,
151 print(
'Error downloading {}'.format(filepath))
152 except Exception
as e:
154 print(
'Exception: {:}'.format(e))
159 Returns a unique set of links from a given url.
160 Optionally specify regex to filter for acceptable files;
161 default is to list only links starting with url.
165 with session.get(url, stream=
True, timeout=self.
timeout)
as response:
172 for link
in linklist:
173 link[
'href'] =
full_url(url, link[
'href'])
178 regex = re.compile(regex)
179 linklist = [link
for link
in linklist
if regex.search(link[
'href'])]
181 linklist = [link
for link
in linklist
if base_url(url)
in link[
'href']]
186 clobber=False, dry_run=False):
188 Downloads all available files from a remote url into a local dirpath.
189 Default is to download only if local file doesn't match remote size;
190 set clobber=True to always download.
195 if not os.path.exists(dirpath)
and not dry_run:
198 all_links = self.
get_links(url, regex=regex)
199 for link
in all_links:
200 f = os.path.basename(link[
'href'])
201 filepath = os.path.join(dirpath, f)
203 check_times=check_times):
207 downloaded.append(filepath)
216 if __name__ ==
'__main__':
218 if len(sys.argv) > 1:
221 url =
'https://oceandata.sci.gsfc.nasa.gov/Ancillary/LUTs/?format=json'
224 links = sessionUtil.get_links(url)
def getSession(verbose=0, ntries=5)
list(APPEND LIBS ${PGSTK_LIBRARIES}) add_executable(atteph_info_modis atteph_info_modis.c) target_link_libraries(atteph_info_modis $
def download_file(self, url, filepath)
def needs_download(link, filepath, check_times=False)
def getlinks_json(content)
def __init__(self, timeout=5, max_tries=5, verbose=0, clobber=False)
subroutine func(x, conec, n, bconecno, bn, units, u, inno, i, outno, o, Input, Targ, p, sqerr)
def set_mtime(filepath, mtime)
def retry(func, *args, **kwargs)
def download_allfiles(self, url, dirpath, regex='', check_times=False, clobber=False, dry_run=False)
def get_links(self, url, regex='')
def httpdl(server, request, localpath='.', outputfilename=None, ntries=5, uncompress=False, timeout=30., verbose=0, force_download=False, chunk_size=DEFAULT_CHUNK_SIZE)