- well, shortly before I ran out of time, I noticed that my results are different - so I would take care of capitalization (
.lower()
) and punctuation ("
,)
,,
, etc.) - optimize for big files (don't load all the words in the memory into the list, use generator instead)
- refactor the code, add docstrings, run
mypy
checker and write unit tests that would check if the output matches the expected result (3 - result.txt
)
Last active
March 14, 2020 18:45
-
-
Save radzak/d7bd6e7993c252f51d21fb877339e641 to your computer and use it in GitHub Desktop.
Fineway - Data Coding Challenge
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
from pathlib import Path | |
from typing import List | |
def load_words(path: Path) -> List[str]: | |
all_words = [] | |
with path.open() as f: | |
for line in f: | |
words = line.rstrip('\n').split(' ') | |
all_words.extend(words) | |
return all_words | |
def count_words(words: List[str]) -> Counter: | |
return Counter(words) | |
def print_result(counter: Counter) -> None: | |
for word, no_occurences in counter.most_common(): | |
print(f"{word} ({no_occurences})") | |
def main(): | |
path = Path("2 - Data.txt") | |
words = load_words(path) | |
counter = count_words(words) | |
print_result(counter) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment