Skip to content

Instantly share code, notes, and snippets.

@hang15
Last active August 5, 2020 14:28
Show Gist options
  • Save hang15/cbf908d44db1e9ec55f4fc1df59e4a2b to your computer and use it in GitHub Desktop.
Save hang15/cbf908d44db1e9ec55f4fc1df59e4a2b to your computer and use it in GitHub Desktop.
def get_object_bodylines(s3_object, offset):
resp = s3_object.get(Range=f'bytes={offset}-')
body: botocore.response.StreamingBody = resp['Body']
return BodyLines(body)
class BodyLines:
def __init__(self, body: botocore.response.StreamingBody, initial_offset=0):
self.body = body
self.offset = initial_offset
def iter_lines(self, chunk_size=1024):
"""Return an iterator to yield lines from the raw stream.
This is achieved by reading chunk of bytes (of size chunk_size) at a
time from the raw stream, and then yielding lines from there.
"""
pending = b''
for chunk in self.body.iter_chunks(chunk_size):
lines = (pending + chunk).splitlines(True)
for line in lines[:-1]:
self.offset += len(line)
yield line.decode('utf-8')
pending = lines[-1]
if pending:
self.offset += len(pending)
yield pending.decode('utf-8')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment