Created
September 10, 2022 08:19
-
-
Save aymanfarhat/c41873f6842dd060b8d4d92b1106c152 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Copyright 2022 Google LLC. | |
#SPDX-License-Identifier: Apache-2.0 | |
import os | |
import psutil | |
from google.cloud.storage import Client | |
def get_mem_mb(): | |
return psutil.Process().memory_info().rss / (1024 * 1024) | |
class ChunkParser(object): | |
def __init__(self, fileobj): | |
self._fileobj = fileobj | |
self.chunk_count = 0 | |
self.line_breaks_count = 0 | |
def write(self, chunk): | |
#self._fileobj.write(chunk) | |
line_breaks = chunk.count(b'\n') | |
self.chunk_count += 1 | |
self.line_breaks_count += line_breaks | |
print(f'Allocated memory on start {get_mem_mb()}') | |
client = Client() | |
bucket = client.get_bucket('your-bucket') | |
blob = bucket.blob('inputs/data/compressed.csv') | |
with open('virtua_file', 'wb', os.O_NONBLOCK) as blob_file: | |
parser = ChunkParser(blob_file) | |
blob.download_to_file(parser) | |
print(f'Total chunks {parser.chunk_count}') | |
print(f'Total line breaks {parser.line_breaks_count}') | |
print(f'Allocated memory on end {get_mem_mb()}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment