Source code for plate_model_manager.download_utils

import json
import os
from datetime import datetime, timedelta
from pathlib import Path

from . import network_requests, network_utils

EXPIRY_TIME_FORMAT = "%Y/%m/%d, %H:%M:%S"


[docs]def check_redownload_need(metadata_file, url): """check the metadata file and decide if redownload is necessary :param metadata_file: metadata file path :param url: url for the target file :returns download_flag, etag: a flag indicates if redownload is neccesarry and old etag in the meta file. """ download_flag = False meta_etag = None if os.path.isfile(metadata_file): with open(metadata_file, "r") as f: meta = json.load(f) if "url" in meta: meta_url = meta["url"] if meta_url != url: # if the data url has changed, re-download download_flag = True else: download_flag = True # if the url is the same, now check the expiry date if not download_flag: if "expiry" in meta: try: meta_expiry = meta["expiry"] expiry_date = datetime.strptime(meta_expiry, EXPIRY_TIME_FORMAT) now = datetime.now() if now > expiry_date: download_flag = True # expired except ValueError: download_flag = True # invalid expiry date else: download_flag = True # no expiry date in metafile if "etag" in meta: meta_etag = meta["etag"] else: download_flag = True # if metadata_file does not exist return download_flag, meta_etag
[docs]def download_file(url, metadata_file, dst_path, expire_hours=12, large_file_hint=False): """download a file from "url", save the file in "dst_path" and write the metadata a metadata file will also be created for the file :param url: the url to the raster file :param metadata_file: the path to the metadata :param dst_path: the folder path to save the raster file """ print(f"downloading {url}") download_flag, etag = check_redownload_need(metadata_file, url) file_size = None update_meta = False new_etag = etag if (download_flag and etag is not None) or large_file_hint: # print(f"Checking the etag {url}...") # check the file size and etag for large file headers = network_utils.get_headers(url) file_size = network_utils.get_content_length(headers) new_etag = network_utils.get_etag(headers) if etag is not None and etag == new_etag: download_flag = False update_meta = True # only redownload when necessary if download_flag: if file_size and file_size > 20 * 1000 * 1000: new_etag = network_requests.fetch_large_file( url, dst_path, filesize=file_size, auto_unzip=True, check_etag=False ) else: new_etag = network_requests.fetch_file( url, dst_path, etag=etag, auto_unzip=True, ) else: print( "The local file(s) is/are still good. Will not download again at this moment." ) if etag != new_etag or new_etag is None or update_meta: # update metadata file metadata = { "url": url, "expiry": (datetime.now() + timedelta(hours=expire_hours)).strftime( EXPIRY_TIME_FORMAT ), "etag": new_etag, } Path("/".join(metadata_file.split("/")[:-1])).mkdir(parents=True, exist_ok=True) with open(metadata_file, "w+") as f: json.dump(metadata, f)