- 
      
- 
        Save wy193777/0e2a4932e81afc6aa4c8f7a2984f34e2 to your computer and use it in GitHub Desktop. 
| """ | |
| This is free and unencumbered software released into the public domain. | |
| Anyone is free to copy, modify, publish, use, compile, sell, or | |
| distribute this software, either in source code form or as a compiled | |
| binary, for any purpose, commercial or non-commercial, and by any | |
| means. | |
| In jurisdictions that recognize copyright laws, the author or authors | |
| of this software dedicate any and all copyright interest in the | |
| software to the public domain. We make this dedication for the benefit | |
| of the public at large and to the detriment of our heirs and | |
| successors. We intend this dedication to be an overt act of | |
| relinquishment in perpetuity of all present and future rights to this | |
| software under copyright law. | |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
| EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
| MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
| IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
| OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
| ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
| OTHER DEALINGS IN THE SOFTWARE. | |
| For more information, please refer to <http://unlicense.org/> | |
| """ | |
| import requests | |
| from tqdm import tqdm | |
| def download_from_url(url, dst): | |
| """ | |
| @param: url to download file | |
| @param: dst place to put the file | |
| """ | |
| file_size = int(urlopen(url).info().get('Content-Length', -1)) | |
| if os.path.exists(dst): | |
| first_byte = os.path.getsize(dst) | |
| else: | |
| first_byte = 0 | |
| if first_byte >= file_size: | |
| return file_size | |
| header = {"Range": "bytes=%s-%s" % (first_byte, file_size)} | |
| pbar = tqdm( | |
| total=file_size, initial=first_byte, | |
| unit='B', unit_scale=True, desc=url.split('/')[-1]) | |
| req = requests.get(url, headers=header, stream=True) | |
| with(open(dst, 'ab')) as f: | |
| for chunk in req.iter_content(chunk_size=1024): | |
| if chunk: | |
| f.write(chunk) | |
| pbar.update(1024) | |
| pbar.close() | |
| return file_size | 
import requests
from tqdm import tqdm
import os
def download_from_url(url, dst):
"""
@param: url to download file
@param: dst place to put the file
"""
file_size = int(requests.head(url).headers["Content-Length"])
if os.path.exists(dst):
first_byte = os.path.getsize(dst)
else:
first_byte = 0
if first_byte >= file_size:
return file_size
header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
pbar = tqdm(
total=file_size, initial=first_byte,
unit='B', unit_scale=True, desc=url.split('/')[-1])
req = requests.get(url, headers=header, stream=True)
with(open(dst, 'ab')) as f:
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
pbar.update(1024)
pbar.close()
return file_size
Because before downloading I need to login with a csrfmiddlewaretoken, I use the following code:
def download_from_url(session_requests, url, destination_folder):
    """
    @param: url to download file
    @param: dst place to put the file
    """
    result = session_requests.get(
        url,
        stream = True,
        headers = dict(referer = url)
    )
    download_details = {}
    download_details['name'] = re.findall("filename=(.+)", result.headers['content-disposition'])[0]
    download_details['size'] = int(result.headers["Content-Length"])
    dst = os.path.join(destination_folder, download_details['name'])
    if Path(dst).is_file():
        first_byte = os.path.getsize(dst)
    else:
        first_byte = 0
    if first_byte >= download_details['size']:
        return download_details['size']
    header = {"Range": "bytes=%s-%s" % (first_byte, download_details['size'])}
    pbar = tqdm(
        total=download_details['size'],
        initial=first_byte,
        unit='B',
        unit_scale=True,
        desc=download_details['name'])
    req = session_requests.get(url, headers=header, stream=True)
    with(open(dst, 'ab')) as f:
        for chunk in req.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
                pbar.update(1024)
    pbar.close()
    return download_details['size']where session_requests is:
session_requests = requests.session()
# authentication and login section
# ...
# ...
download_from_url(session_requests, url, folder_to_download_to)Add unit_divisor=1024 to pbar = tqdm(...) to get precise file size.
Since you already imported requests, I would replace the urlopen call with a requests call: