Last active
January 27, 2022 12:06
-
-
Save ftfarias/91e4e2852b409e26109429447944c341 to your computer and use it in GitHub Desktop.
Small but practical functions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# parse iso time | |
datetime.strptime('2019-04-17T09:02:16.00428Z', '%Y-%m-%dT%H:%M:%S.%fZ') | |
import itertools | |
def grouper(iterable, n): | |
it = iter(iterable) | |
while True: | |
chunk = tuple(itertools.islice(it, n)) | |
if not chunk: | |
return | |
yield chunk | |
# list(grouper(range(102),5)) | |
[(0, 1, 2, 3, 4), | |
(5, 6, 7, 8, 9), | |
(10, 11, 12, 13, 14), | |
(15, 16, 17, 18, 19), | |
... | |
(90, 91, 92, 93, 94), | |
(95, 96, 97, 98, 99), | |
(100, 101)] | |
def binary_search(data, item): | |
left, right = 0, len(data) - 1 | |
while left <= right: | |
middle = (left + right) // 2 | |
if item < data[middle]: | |
right = middle - 1 | |
elif item > data[middle]: | |
left = middle + 1 | |
else: | |
return middle | |
return -1 | |
https://en.wikipedia.org/wiki/Autovivification | |
>>> class Tree(dict): | |
... def __missing__(self, key): | |
... value = self[key] = type(self)() | |
... return value | |
>>> # common names by class, order, genus, and type-species | |
>>> common_names = Tree() | |
>>> common_names['Mammalia']['Primates']['Homo']['H. sapiens'] = 'human being' | |
>>> common_names | |
{'Mammalia': {'Primates': {'Homo': {'H. sapiens': 'human being'}}}} | |
>>> # Famous quotes by play, act, scene, and page | |
>>> quotes = Tree() | |
>>> quotes['Hamlet'][1][3][3] = 'This above all: to thine own self be true.' | |
>>> quotes | |
{'Hamlet': {1: {3: {3: 'This above all: to thine own self be true.'}}}} | |
def try_parse_datetime(text): | |
formats = [ | |
'%Y-%m-%dT%H:%M:%S.%fZ', | |
'%Y-%m-%dT%H:%M:%S.%f', | |
'%Y-%m-%dT%H:%M:%S', | |
] | |
for f in formats: | |
try: | |
v = datetime.strptime(text, f) | |
return v | |
except ValueError: | |
pass | |
print(text) | |
return None | |
----- filter byte zeros in S3 objects (for Messytables) ---- | |
obj = bucket.Object(csv_path) | |
file_content = obj.get()["Body"] | |
# the code bellow filters out '\0' (byte 0x00) in the csv file | |
buffer = BytesIO() | |
for c in file_content.read(): | |
if c != 0: | |
buffer.write(c.to_bytes(1, 'little')) | |
table_set = CSVTableSet(buffer) # send to Messytables | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment