Last active
September 24, 2024 11:01
-
-
Save autrilla/c49c5be083590d99c2641a4c9812a00f to your computer and use it in GitHub Desktop.
google.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import abc | |
| import datetime | |
| import decimal | |
| import os | |
| import typing | |
| from typing import List | |
| import beangulp | |
| import camelot | |
| import pandas as pd | |
| from beancount.core import amount, data, flags, position | |
| from beancount.core.number import D | |
| from beangulp import mimetypes | |
| from beangulp.testing import main | |
| class PayslipLineProcessor(abc.ABC): | |
| @abc.abstractmethod | |
| def process( | |
| self, | |
| line: pd.DataFrame, | |
| ) -> List[data.Posting]: | |
| pass | |
| def extract_number(self, s: str) -> decimal.Decimal: | |
| s = s.replace("CHF", "") | |
| return typing.cast(decimal.Decimal, D(s)) | |
| class IncomeLineProcessor(PayslipLineProcessor): | |
| def __init__(self, account): | |
| self.account = account | |
| def process( | |
| self, | |
| line: pd.DataFrame, | |
| ) -> List[data.Posting]: | |
| current_period = line.iloc[-1] | |
| prior_period = line.iloc[2] | |
| postings = [ | |
| data.Posting( | |
| self.account, | |
| amount.Amount(-self.extract_number(current_period), "CHF"), | |
| None, | |
| None, | |
| None, | |
| None, | |
| ) | |
| ] | |
| prior_period_amnt = self.extract_number(prior_period) | |
| if prior_period_amnt != 0: | |
| postings.append( | |
| data.Posting( | |
| self.account, | |
| amount.Amount(-self.extract_number(prior_period), "CHF"), | |
| None, | |
| None, | |
| None, | |
| {"prior_period": "yes"}, | |
| ) | |
| ) | |
| return postings | |
| class ExpenseLineProcessor(IncomeLineProcessor): | |
| def __init__(self, account): | |
| self.account = account | |
| def process( | |
| self, | |
| line: pd.DataFrame, | |
| ) -> List[data.Posting]: | |
| postings = super().process(line) | |
| return [posting._replace(units=-posting.units) for posting in postings] | |
| class MealAllowanceProcessor(PayslipLineProcessor): | |
| def __init__(self, income_account, expense_account): | |
| self.income_account = income_account | |
| self.expense_account = expense_account | |
| def process( | |
| self, | |
| line: pd.DataFrame, | |
| ) -> List[data.Posting]: | |
| current_period = line.iloc[-1] | |
| return [ | |
| data.Posting( | |
| self.income_account, | |
| amount.Amount(-self.extract_number(current_period), "CHF"), | |
| None, | |
| None, | |
| None, | |
| None, | |
| ), | |
| data.Posting( | |
| self.expense_account, | |
| amount.Amount(self.extract_number(current_period), "CHF"), | |
| None, | |
| None, | |
| None, | |
| None, | |
| ), | |
| ] | |
| class Pillar2Processor(PayslipLineProcessor): | |
| """ | |
| Processor for Pillar 2 contributions. | |
| Reads the employee contribution line from the payslip and outputs employee | |
| and employer contributions. | |
| """ | |
| def __init__( | |
| self, | |
| asset_account, | |
| employee_contribution_account, | |
| employer_contribution_account, | |
| employee_contribution_permille=45, | |
| employer_contribution_permille=85, | |
| ): | |
| self.asset_account = asset_account | |
| self.employee_contribution_account = employee_contribution_account | |
| self.employer_contribution_account = employer_contribution_account | |
| self.employee_contribution_permille = employee_contribution_permille | |
| self.employer_contribution_permille = employer_contribution_permille | |
| def pension_buyin( | |
| self, | |
| amnt: decimal.Decimal, | |
| ) -> data.Posting: | |
| return data.Posting( | |
| self.asset_account, | |
| amount.Amount(amnt, "AXA_PILLAR2"), | |
| position.Cost( | |
| decimal.Decimal(1.0), | |
| "CHF", | |
| typing.cast(datetime.date, None), | |
| None, | |
| ), | |
| None, | |
| None, | |
| None, | |
| ) | |
| def process( | |
| self, | |
| line: pd.DataFrame, | |
| ) -> List[data.Posting]: | |
| employee_contribution = -self.extract_number(line.iloc[-1]) | |
| employer_contribution = round( | |
| ( | |
| employee_contribution | |
| * decimal.Decimal(self.employer_contribution_permille) | |
| ) | |
| / decimal.Decimal(self.employee_contribution_permille), | |
| 2, | |
| ) | |
| postings = [ | |
| data.Posting( | |
| self.employer_contribution_account, | |
| amount.Amount(-employer_contribution, "CHF"), | |
| None, | |
| None, | |
| None, | |
| None, | |
| ), | |
| self.pension_buyin(employer_contribution), | |
| self.pension_buyin(employee_contribution), | |
| ] | |
| return postings | |
| PROCESSOR_MAPPINGS = { | |
| "Monthly Salary": IncomeLineProcessor("Income:Google:Salary"), | |
| "ER Health Insurance": IncomeLineProcessor( | |
| "Income:Google:HealthInsuranceContribution" | |
| ), | |
| "Peer Bonus": IncomeLineProcessor("Income:Google:PeerBonus"), | |
| "Spot Bonus Gross": IncomeLineProcessor("Income:Google:SpotBonus"), | |
| "Annual Bonus Gross": IncomeLineProcessor("Income:Google:AnnualBonus"), | |
| "Education Subsidy Gross": IncomeLineProcessor("Income:Google:EducationSubsidy"), | |
| "Meal Allowance Gross Up": IncomeLineProcessor( | |
| "Income:Google:MealAllowanceGrossUp" | |
| ), | |
| "Meal Allowance Net": MealAllowanceProcessor( | |
| "Income:Google:MealAllowance", "Expenses:Google:Meals" | |
| ), | |
| "WHT": ExpenseLineProcessor("Expenses:Tax:Withholding"), | |
| "Swiss Social Security": ExpenseLineProcessor("Expenses:Tax:AHV"), | |
| "Unemployment Insurance": ExpenseLineProcessor( | |
| "Expenses:Tax:UnemploymentInsurance" | |
| ), | |
| "Pension Contribution EE": Pillar2Processor( | |
| "Assets:Pillar2", | |
| "Expenses:PensionContribution", | |
| "Income:Google:PensionContribution", | |
| ), | |
| "MSSB Withholding Credit": IncomeLineProcessor("Expenses:Tax:Withholding"), | |
| "Net Pay": ExpenseLineProcessor("Assets:Transfer"), | |
| } | |
| class Importer(beangulp.Importer): | |
| def __init__(self, account): | |
| self._account = account | |
| @property | |
| def name(self): | |
| return "Google Payslips Importer" | |
| def identify(self, filepath): | |
| if "_Switzerland_Payslip_" not in filepath: | |
| return False | |
| mimetype, _ = mimetypes.guess_type(filepath) | |
| if mimetype != "application/pdf": | |
| return False | |
| return True | |
| def account(self, filepath: str) -> str: | |
| return self._account | |
| def filename(self, filepath) -> str: | |
| return "Payslip.pdf" | |
| def date(self, filepath: str) -> datetime.date: | |
| filename = os.path.basename(filepath) | |
| d = filename.split("_")[0] | |
| return datetime.datetime.strptime(d, "%Y%m%d").date() | |
| def extract( | |
| self, | |
| filepath: str, | |
| existing: data.Entries, | |
| ) -> data.Entries: | |
| table = camelot.read_pdf( | |
| filepath, | |
| flavor="stream", | |
| table_areas=[ | |
| ",".join( | |
| ( | |
| "0", # x1, left side of the page | |
| "600", # y1, should be enough to skip non-table parts | |
| "10000", # x2, beyond the right side of the page | |
| "40", # y2, should be enough to skip the page numbers | |
| ) | |
| ) | |
| ], | |
| )[0].df | |
| postings = [] | |
| for row in table.iloc: | |
| processor = PROCESSOR_MAPPINGS.get(row[0], None) | |
| if processor is not None: | |
| postings.extend(processor.process(row)) | |
| transaction = data.Transaction( # type: ignore | |
| data.new_metadata(__file__, 0), | |
| self.date(filepath), | |
| flags.FLAG_OKAY, | |
| "self", # payee | |
| "Google Payslip", # narration | |
| data.EMPTY_SET, # type: ignore | |
| data.EMPTY_SET, # type: ignore | |
| postings, | |
| ) | |
| return [ | |
| transaction, | |
| ] | |
| if __name__ == "__main__": | |
| main(Importer("Income:Google")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment