Skip to content

Instantly share code, notes, and snippets.

@autrilla
Last active September 24, 2024 11:01
Show Gist options
  • Select an option

  • Save autrilla/c49c5be083590d99c2641a4c9812a00f to your computer and use it in GitHub Desktop.

Select an option

Save autrilla/c49c5be083590d99c2641a4c9812a00f to your computer and use it in GitHub Desktop.
google.py
import abc
import datetime
import decimal
import os
import typing
from typing import List
import beangulp
import camelot
import pandas as pd
from beancount.core import amount, data, flags, position
from beancount.core.number import D
from beangulp import mimetypes
from beangulp.testing import main
class PayslipLineProcessor(abc.ABC):
@abc.abstractmethod
def process(
self,
line: pd.DataFrame,
) -> List[data.Posting]:
pass
def extract_number(self, s: str) -> decimal.Decimal:
s = s.replace("CHF", "")
return typing.cast(decimal.Decimal, D(s))
class IncomeLineProcessor(PayslipLineProcessor):
def __init__(self, account):
self.account = account
def process(
self,
line: pd.DataFrame,
) -> List[data.Posting]:
current_period = line.iloc[-1]
prior_period = line.iloc[2]
postings = [
data.Posting(
self.account,
amount.Amount(-self.extract_number(current_period), "CHF"),
None,
None,
None,
None,
)
]
prior_period_amnt = self.extract_number(prior_period)
if prior_period_amnt != 0:
postings.append(
data.Posting(
self.account,
amount.Amount(-self.extract_number(prior_period), "CHF"),
None,
None,
None,
{"prior_period": "yes"},
)
)
return postings
class ExpenseLineProcessor(IncomeLineProcessor):
def __init__(self, account):
self.account = account
def process(
self,
line: pd.DataFrame,
) -> List[data.Posting]:
postings = super().process(line)
return [posting._replace(units=-posting.units) for posting in postings]
class MealAllowanceProcessor(PayslipLineProcessor):
def __init__(self, income_account, expense_account):
self.income_account = income_account
self.expense_account = expense_account
def process(
self,
line: pd.DataFrame,
) -> List[data.Posting]:
current_period = line.iloc[-1]
return [
data.Posting(
self.income_account,
amount.Amount(-self.extract_number(current_period), "CHF"),
None,
None,
None,
None,
),
data.Posting(
self.expense_account,
amount.Amount(self.extract_number(current_period), "CHF"),
None,
None,
None,
None,
),
]
class Pillar2Processor(PayslipLineProcessor):
"""
Processor for Pillar 2 contributions.
Reads the employee contribution line from the payslip and outputs employee
and employer contributions.
"""
def __init__(
self,
asset_account,
employee_contribution_account,
employer_contribution_account,
employee_contribution_permille=45,
employer_contribution_permille=85,
):
self.asset_account = asset_account
self.employee_contribution_account = employee_contribution_account
self.employer_contribution_account = employer_contribution_account
self.employee_contribution_permille = employee_contribution_permille
self.employer_contribution_permille = employer_contribution_permille
def pension_buyin(
self,
amnt: decimal.Decimal,
) -> data.Posting:
return data.Posting(
self.asset_account,
amount.Amount(amnt, "AXA_PILLAR2"),
position.Cost(
decimal.Decimal(1.0),
"CHF",
typing.cast(datetime.date, None),
None,
),
None,
None,
None,
)
def process(
self,
line: pd.DataFrame,
) -> List[data.Posting]:
employee_contribution = -self.extract_number(line.iloc[-1])
employer_contribution = round(
(
employee_contribution
* decimal.Decimal(self.employer_contribution_permille)
)
/ decimal.Decimal(self.employee_contribution_permille),
2,
)
postings = [
data.Posting(
self.employer_contribution_account,
amount.Amount(-employer_contribution, "CHF"),
None,
None,
None,
None,
),
self.pension_buyin(employer_contribution),
self.pension_buyin(employee_contribution),
]
return postings
PROCESSOR_MAPPINGS = {
"Monthly Salary": IncomeLineProcessor("Income:Google:Salary"),
"ER Health Insurance": IncomeLineProcessor(
"Income:Google:HealthInsuranceContribution"
),
"Peer Bonus": IncomeLineProcessor("Income:Google:PeerBonus"),
"Spot Bonus Gross": IncomeLineProcessor("Income:Google:SpotBonus"),
"Annual Bonus Gross": IncomeLineProcessor("Income:Google:AnnualBonus"),
"Education Subsidy Gross": IncomeLineProcessor("Income:Google:EducationSubsidy"),
"Meal Allowance Gross Up": IncomeLineProcessor(
"Income:Google:MealAllowanceGrossUp"
),
"Meal Allowance Net": MealAllowanceProcessor(
"Income:Google:MealAllowance", "Expenses:Google:Meals"
),
"WHT": ExpenseLineProcessor("Expenses:Tax:Withholding"),
"Swiss Social Security": ExpenseLineProcessor("Expenses:Tax:AHV"),
"Unemployment Insurance": ExpenseLineProcessor(
"Expenses:Tax:UnemploymentInsurance"
),
"Pension Contribution EE": Pillar2Processor(
"Assets:Pillar2",
"Expenses:PensionContribution",
"Income:Google:PensionContribution",
),
"MSSB Withholding Credit": IncomeLineProcessor("Expenses:Tax:Withholding"),
"Net Pay": ExpenseLineProcessor("Assets:Transfer"),
}
class Importer(beangulp.Importer):
def __init__(self, account):
self._account = account
@property
def name(self):
return "Google Payslips Importer"
def identify(self, filepath):
if "_Switzerland_Payslip_" not in filepath:
return False
mimetype, _ = mimetypes.guess_type(filepath)
if mimetype != "application/pdf":
return False
return True
def account(self, filepath: str) -> str:
return self._account
def filename(self, filepath) -> str:
return "Payslip.pdf"
def date(self, filepath: str) -> datetime.date:
filename = os.path.basename(filepath)
d = filename.split("_")[0]
return datetime.datetime.strptime(d, "%Y%m%d").date()
def extract(
self,
filepath: str,
existing: data.Entries,
) -> data.Entries:
table = camelot.read_pdf(
filepath,
flavor="stream",
table_areas=[
",".join(
(
"0", # x1, left side of the page
"600", # y1, should be enough to skip non-table parts
"10000", # x2, beyond the right side of the page
"40", # y2, should be enough to skip the page numbers
)
)
],
)[0].df
postings = []
for row in table.iloc:
processor = PROCESSOR_MAPPINGS.get(row[0], None)
if processor is not None:
postings.extend(processor.process(row))
transaction = data.Transaction( # type: ignore
data.new_metadata(__file__, 0),
self.date(filepath),
flags.FLAG_OKAY,
"self", # payee
"Google Payslip", # narration
data.EMPTY_SET, # type: ignore
data.EMPTY_SET, # type: ignore
postings,
)
return [
transaction,
]
if __name__ == "__main__":
main(Importer("Income:Google"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment