Last active
August 31, 2020 10:15
-
-
Save egorsmkv/712436d9d82f4451973ae7622f541deb to your computer and use it in GitHub Desktop.
An algorithm to search longest date intervals among a list of dates in Python (currently searches longest date intervals in months)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
from typing import List | |
def solve(date_items: List[datetime]): | |
""" | |
Get the longest date interval from the date_items list. | |
:param date_items: | |
:return: | |
""" | |
# 1) Sort the items | |
sorted_date_items = sorted(date_items) | |
# 2) Convert our items to a dict as "[item_year]_[item_month]__[nth_of_year] => item" | |
nth_dates = {} | |
for item in sorted_date_items: | |
new_year_day = datetime(year=item.year, month=1, day=1) | |
nth_of_year = (item - new_year_day).days + 1 | |
key = f'{item.year}_{item.month}_{nth_of_year}' | |
nth_dates[key] = item | |
# 3) Iterate over items | |
count_map = {} | |
item_start = None | |
counter = 1 | |
for key, item in nth_dates.items(): | |
current_year, current_month, current_nth_day = key.split('_') | |
current_nth_day = int(current_nth_day) | |
next_key = f'{current_year}_{current_month}_{current_nth_day + 1}' | |
if next_key in nth_dates: | |
if item_start is None: | |
item_start = item | |
counter += 1 | |
else: | |
if item_start is None: | |
item_start = item | |
if counter >= 2: | |
# add to our map | |
map_key = f'{current_year}_{current_month}_{counter}' | |
count_map[map_key] = {'item_start': item_start, 'item_end': item} | |
# reset tmp variables | |
item_start = None | |
counter = 1 | |
# 4) Transform elements to a list with its counters | |
by_counter = [] | |
for key, value in count_map.items(): | |
_, _, counter = key.split('_') | |
counter = int(counter) | |
by_counter.append({ | |
'counter': counter, | |
'value': value, | |
}) | |
# 5) Sort the list by element's counters | |
by_counter_sorted = sorted(by_counter, key=lambda it: it['counter']) | |
best_match = by_counter_sorted[-1] | |
result = { | |
'date_interval': best_match['value'], | |
'consecutive_days': best_match['counter'] | |
} | |
return {'count_map': count_map, 'result': result} | |
if __name__ == '__main__': | |
our_dates = [ | |
datetime(2019, 2, 19), | |
datetime(2019, 2, 21), | |
datetime(2019, 2, 22), | |
datetime(2019, 4, 22), | |
datetime(2019, 4, 23), | |
datetime(2019, 4, 24), | |
datetime(2020, 4, 23), | |
datetime(2020, 4, 24), | |
datetime(2020, 6, 26), | |
datetime(2020, 6, 27), | |
datetime(2020, 6, 28), | |
datetime(2020, 6, 29), | |
datetime(2020, 6, 30), | |
datetime(2020, 6, 1), | |
datetime(2020, 6, 2), | |
datetime(2020, 6, 3), | |
datetime(2020, 6, 4), | |
datetime(2020, 6, 5), | |
datetime(2020, 6, 6), | |
datetime(2020, 6, 7), | |
datetime(2020, 7, 11), | |
datetime(2020, 7, 12), | |
datetime(2020, 7, 13), | |
] | |
longest_interval = solve(our_dates) | |
result_count_map = longest_interval['count_map'] | |
for k, v in result_count_map.items(): | |
item_s = v['item_start'].strftime('%Y-%m-%d') | |
item_e = v['item_end'].strftime('%Y-%m-%d') | |
print(k, v) | |
print(item_s, item_e) | |
print() | |
print('---') | |
print() | |
res = longest_interval['result'] | |
print(res) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> python algo.py | |
2019_2_2 {'item_start': datetime.datetime(2019, 2, 19, 0, 0), 'item_end': datetime.datetime(2019, 2, 22, 0, 0)} | |
2019-02-19 2019-02-22 | |
2019_4_3 {'item_start': datetime.datetime(2019, 4, 22, 0, 0), 'item_end': datetime.datetime(2019, 4, 24, 0, 0)} | |
2019-04-22 2019-04-24 | |
2020_4_2 {'item_start': datetime.datetime(2020, 4, 23, 0, 0), 'item_end': datetime.datetime(2020, 4, 24, 0, 0)} | |
2020-04-23 2020-04-24 | |
2020_6_7 {'item_start': datetime.datetime(2020, 6, 1, 0, 0), 'item_end': datetime.datetime(2020, 6, 7, 0, 0)} | |
2020-06-01 2020-06-07 | |
2020_6_5 {'item_start': datetime.datetime(2020, 6, 26, 0, 0), 'item_end': datetime.datetime(2020, 6, 30, 0, 0)} | |
2020-06-26 2020-06-30 | |
2020_7_3 {'item_start': datetime.datetime(2020, 7, 11, 0, 0), 'item_end': datetime.datetime(2020, 7, 13, 0, 0)} | |
2020-07-11 2020-07-13 | |
--- | |
{'date_interval': {'item_start': datetime.datetime(2020, 6, 1, 0, 0), 'item_end': datetime.datetime(2020, 6, 7, 0, 0)}, 'consecutive_days': 7} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment