Last active
March 31, 2021 10:46
-
-
Save PatrykGala/6b34a3f7a9bb3189ea8adc87835b97c1 to your computer and use it in GitHub Desktop.
Upload large file to GCS with requests and resumable api (python 2.7)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#python3.7 | |
import requests | |
from datetime import datetime | |
import time | |
file_size = 2194418361937 #TODO size in bytes | |
LOCATION = "https://storage.googleapis.com/upload/storage/v1/b/sc-9369-dataengineering-prod-qeppo-arch/o?uploadType=resumable&name=****" #TODO change location upload | |
while True: | |
res = requests.put( | |
url=LOCATION, | |
headers={ | |
'Content-Length': '0', | |
"Content-Range": "bytes */file_size", | |
}) | |
res.raise_for_status() | |
range = res.headers['range'] | |
bytes = int(range.replace('bytes=0-', '')) | |
percentage = bytes / file_size * 100 | |
print((datetime.now().strftime("%d-%m-%y %H:%M ") + ("%.1f" % percentage) + "%") + " " + ( | |
"%.2f" % ((file_size - bytes) / 1024 / 1024 / 1024)) + "GB") | |
time.sleep(5) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#python 2.7 | |
from datetime import datetime | |
from requests.adapters import HTTPAdapter | |
from urllib3 import Retry | |
import requests | |
CHUNK_SIZE = max(262144, 1024 * 1024 * 100) | |
FILENAME = 'filename' #TODO | |
TOKEN = "TOKEN" #TODO | |
file_size = 2194418361937 #TODO | |
def read_in_chunks(file_object, chunk_size=CHUNK_SIZE): | |
offset = -1 | |
while True: | |
data = file_object.read(chunk_size) | |
if not data: | |
break | |
start_offset = offset + 1 | |
offset = offset + len(data) | |
yield data, start_offset, offset | |
progress_percentage = '-1' | |
def progress(percentage): | |
global progress_percentage | |
if progress_percentage != percentage: | |
print datetime.now().strftime("%d-%m-%y %H:%M ") + ("%.1f" % percentage) + "%" | |
progress_percentage = ("%.1f" % percentage) | |
retry_strategy = Retry(connect=5, read=5, status=5, status_forcelist=[500], raise_on_status=True) | |
adapter = HTTPAdapter(max_retries=retry_strategy) | |
with open(FILENAME, 'rb') as f: | |
res = requests.post( | |
url='https://storage.googleapis.com/upload/storage/v1/b/sc-9369-dataengineering-prod-qeppo-arch/o?uploadType=resumable&name={path}_{filename}'.format( | |
path=datetime.now().strftime("%d_%m_%y_%H_%M"), filename='filename'), #TODO change filename | |
headers={ | |
'X-Upload-Content-Type': 'text/csv', | |
"Authorization": "Bearer " + TOKEN, | |
}) | |
res.raise_for_status() | |
location = res.headers['location'] | |
print location | |
for piece, first_byte, last_byte in read_in_chunks(f): | |
content_range = "bytes {first_byte}-{last_byte}/{file_size}".format(first_byte=first_byte, last_byte=last_byte, | |
file_size=file_size) | |
progress(float(first_byte) / float(file_size) * float(100)) | |
with requests.Session() as session: | |
session.mount("https://", adapter) | |
response = session.put(location, data=piece, headers={ | |
"Content-Length": str(len(piece)), | |
"Content-Range": content_range | |
}) | |
response.raise_for_status() | |
progress(float(first_byte) / float(file_size) * float(100)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment