Last active
May 8, 2024 13:28
-
-
Save Intelrunner/654df57f0fb27818001ccebb7e4e3b75 to your computer and use it in GitHub Desktop.
This will do a quick rip and save of all tables inside of a PDF. It is not pretty but it does work. Ignore the errors about jpye.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Instructions | |
- Download to folder | |
- > Run "pip3 freeze > requirements.txt" | |
- > Run pip3 "install -r requirements.txt" | |
- > run python3 -m main.py | |
- Enjoy | |
""" | |
from tabula import convert_into, read_pdf | |
import os | |
import tabula | |
# ask for the file name | |
file_name = input("Enter the file name or relative path: ") | |
df = tabula.read_pdf(file_name, pages="all", encoding='utf-8', multiple_tables=False) | |
if not os.path.exists("output"): | |
os.makedirs("output") | |
print("Directory created") | |
for i, table in enumerate(df): | |
table.to_csv(f"output/table_{i}.csv", index=False) | |
print(f"Table {i} saved to CSV") | |
else: | |
print("Directory already exists") | |
for i, table in enumerate(df): | |
table.to_csv(f"output/table_{i}.csv", index=False) | |
print(f"Table {i} saved to CSV") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment