autrilla · September 24, 2024 11:01
diff --git a/Google.py b/Google.py
 import abc
 import datetime
 import decimal
 import os
 import typing
 from typing import List

 import beangulp
 import camelot
 import pandas as pd
 from beancount.core import amount, data, flags, position
 from beancount.core.number import D
 from beangulp import mimetypes
 from beangulp.testing import main


 class PayslipLineProcessor(abc.ABC):
    @abc.abstractmethod
    def process(
        self,
        line: pd.DataFrame,
    ) -> List[data.Posting]:
        pass

    def extract_number(self, s: str) -> decimal.Decimal:
        s = s.replace("CHF", "")
        return typing.cast(decimal.Decimal, D(s))


 class IncomeLineProcessor(PayslipLineProcessor):
    def __init__(self, account):
        self.account = account

    def process(
        self,
        line: pd.DataFrame,
    ) -> List[data.Posting]:
        current_period = line.iloc[-1]
        prior_period = line.iloc[2]
        postings = [
            data.Posting(
                self.account,
                amount.Amount(-self.extract_number(current_period), "CHF"),
                None,
                None,
                None,
                None,
            )
        ]
        prior_period_amnt = self.extract_number(prior_period)
        if prior_period_amnt != 0:
            postings.append(
                data.Posting(
                    self.account,
                    amount.Amount(-self.extract_number(prior_period), "CHF"),
                    None,
                    None,
                    None,
                    {"prior_period": "yes"},
                )
            )

        return postings


 class ExpenseLineProcessor(IncomeLineProcessor):
    def __init__(self, account):
        self.account = account

    def process(
        self,
        line: pd.DataFrame,
    ) -> List[data.Posting]:
        postings = super().process(line)
        return [posting._replace(units=-posting.units) for posting in postings]


 class MealAllowanceProcessor(PayslipLineProcessor):
    def __init__(self, income_account, expense_account):
        self.income_account = income_account
        self.expense_account = expense_account

    def process(
        self,
        line: pd.DataFrame,
    ) -> List[data.Posting]:
        current_period = line.iloc[-1]
        return [
            data.Posting(
                self.income_account,
                amount.Amount(-self.extract_number(current_period), "CHF"),
                None,
                None,
                None,
                None,
            ),
            data.Posting(
                self.expense_account,
                amount.Amount(self.extract_number(current_period), "CHF"),
                None,
                None,
                None,
                None,
            ),
        ]


 class Pillar2Processor(PayslipLineProcessor):
    """
    Processor for Pillar 2 contributions.

    Reads the employee contribution line from the payslip and outputs employee
    and employer contributions.
    """

    def __init__(
        self,
        asset_account,
        employee_contribution_account,
        employer_contribution_account,
        employee_contribution_permille=45,
        employer_contribution_permille=85,
    ):
        self.asset_account = asset_account
        self.employee_contribution_account = employee_contribution_account
        self.employer_contribution_account = employer_contribution_account
        self.employee_contribution_permille = employee_contribution_permille
        self.employer_contribution_permille = employer_contribution_permille

    def pension_buyin(
        self,
        amnt: decimal.Decimal,
    ) -> data.Posting:
        return data.Posting(
            self.asset_account,
            amount.Amount(amnt, "AXA_PILLAR2"),
            position.Cost(
                decimal.Decimal(1.0),
                "CHF",
                typing.cast(datetime.date, None),
                None,
            ),
            None,
            None,
            None,
        )

    def process(
        self,
        line: pd.DataFrame,
    ) -> List[data.Posting]:
        employee_contribution = -self.extract_number(line.iloc[-1])
        employer_contribution = round(
            (
                employee_contribution
                * decimal.Decimal(self.employer_contribution_permille)
            )
            / decimal.Decimal(self.employee_contribution_permille),
            2,
        )
        postings = [
            data.Posting(
                self.employer_contribution_account,
                amount.Amount(-employer_contribution, "CHF"),
                None,
                None,
                None,
                None,
            ),
            self.pension_buyin(employer_contribution),
            self.pension_buyin(employee_contribution),
        ]
        return postings


 PROCESSOR_MAPPINGS = {
    "Monthly Salary": IncomeLineProcessor("Income:Google:Salary"),
    "ER Health Insurance": IncomeLineProcessor(
        "Income:Google:HealthInsuranceContribution"
    ),
    "Peer Bonus": IncomeLineProcessor("Income:Google:PeerBonus"),
    "Spot Bonus Gross": IncomeLineProcessor("Income:Google:SpotBonus"),
    "Annual Bonus Gross": IncomeLineProcessor("Income:Google:AnnualBonus"),
    "Education Subsidy Gross": IncomeLineProcessor("Income:Google:EducationSubsidy"),
    "Meal Allowance Gross Up": IncomeLineProcessor(
        "Income:Google:MealAllowanceGrossUp"
    ),
    "Meal Allowance Net": MealAllowanceProcessor(
        "Income:Google:MealAllowance", "Expenses:Google:Meals"
    ),
    "WHT": ExpenseLineProcessor("Expenses:Tax:Withholding"),
    "Swiss Social Security": ExpenseLineProcessor("Expenses:Tax:AHV"),
    "Unemployment Insurance": ExpenseLineProcessor(
        "Expenses:Tax:UnemploymentInsurance"
    ),
    "Pension Contribution EE": Pillar2Processor(
        "Assets:Pillar2",
        "Expenses:PensionContribution",
        "Income:Google:PensionContribution",
    ),
    "MSSB Withholding Credit": IncomeLineProcessor("Expenses:Tax:Withholding"),
    "Net Pay": ExpenseLineProcessor("Assets:Transfer"),
 }


 class Importer(beangulp.Importer):

    def __init__(self, account):
        self._account = account

    @property
    def name(self):
        return "Google Payslips Importer"

    def identify(self, filepath):
        if "_Switzerland_Payslip_" not in filepath:
            return False
        mimetype, _ = mimetypes.guess_type(filepath)
        if mimetype != "application/pdf":
            return False
        return True

    def account(self, filepath: str) -> str:
        return self._account

    def filename(self, filepath) -> str:
        return "Payslip.pdf"

    def date(self, filepath: str) -> datetime.date:
        filename = os.path.basename(filepath)
        d = filename.split("_")[0]
        return datetime.datetime.strptime(d, "%Y%m%d").date()

    def extract(
        self,
        filepath: str,
        existing: data.Entries,
    ) -> data.Entries:
        table = camelot.read_pdf(
            filepath,
            flavor="stream",
            table_areas=[
                ",".join(
                    (
                        "0",  # x1, left side of the page
                        "600",  # y1, should be enough to skip non-table parts
                        "10000",  # x2, beyond the right side of the page
                        "40",  # y2, should be enough to skip the page numbers
                    )
                )
            ],
        )[0].df
        postings = []
        for row in table.iloc:
            processor = PROCESSOR_MAPPINGS.get(row[0], None)
            if processor is not None:
                postings.extend(processor.process(row))
        transaction = data.Transaction(  # type: ignore
            data.new_metadata(__file__, 0),
            self.date(filepath),
            flags.FLAG_OKAY,
            "self",  # payee
            "Google Payslip",  # narration
            data.EMPTY_SET,  # type: ignore
            data.EMPTY_SET,  # type: ignore
            postings,
        )
        return [
            transaction,
        ]


 if __name__ == "__main__":
    main(Importer("Income:Google"))
	import abc
	import datetime
	import decimal
	import os
	import typing
	from typing import List

	import beangulp
	import camelot
	import pandas as pd
	from beancount.core import amount, data, flags, position
	from beancount.core.number import D
	from beangulp import mimetypes
	from beangulp.testing import main


	class PayslipLineProcessor(abc.ABC):
	@abc.abstractmethod
	def process(
	self,
	line: pd.DataFrame,
	) -> List[data.Posting]:
	pass

	def extract_number(self, s: str) -> decimal.Decimal:
	s = s.replace("CHF", "")
	return typing.cast(decimal.Decimal, D(s))


	class IncomeLineProcessor(PayslipLineProcessor):
	def __init__(self, account):
	self.account = account

	def process(
	self,
	line: pd.DataFrame,
	) -> List[data.Posting]:
	current_period = line.iloc[-1]
	prior_period = line.iloc[2]
	postings = [
	data.Posting(
	self.account,
	amount.Amount(-self.extract_number(current_period), "CHF"),
	None,
	None,
	None,
	None,
	)
	]
	prior_period_amnt = self.extract_number(prior_period)
	if prior_period_amnt != 0:
	postings.append(
	data.Posting(
	self.account,
	amount.Amount(-self.extract_number(prior_period), "CHF"),
	None,
	None,
	None,
	{"prior_period": "yes"},
	)
	)

	return postings


	class ExpenseLineProcessor(IncomeLineProcessor):
	def __init__(self, account):
	self.account = account

	def process(
	self,
	line: pd.DataFrame,
	) -> List[data.Posting]:
	postings = super().process(line)
	return [posting._replace(units=-posting.units) for posting in postings]


	class MealAllowanceProcessor(PayslipLineProcessor):
	def __init__(self, income_account, expense_account):
	self.income_account = income_account
	self.expense_account = expense_account

	def process(
	self,
	line: pd.DataFrame,
	) -> List[data.Posting]:
	current_period = line.iloc[-1]
	return [
	data.Posting(
	self.income_account,
	amount.Amount(-self.extract_number(current_period), "CHF"),
	None,
	None,
	None,
	None,
	),
	data.Posting(
	self.expense_account,
	amount.Amount(self.extract_number(current_period), "CHF"),
	None,
	None,
	None,
	None,
	),
	]


	class Pillar2Processor(PayslipLineProcessor):
	"""
	Processor for Pillar 2 contributions.

	Reads the employee contribution line from the payslip and outputs employee
	and employer contributions.
	"""

	def __init__(
	self,
	asset_account,
	employee_contribution_account,
	employer_contribution_account,
	employee_contribution_permille=45,
	employer_contribution_permille=85,
	):
	self.asset_account = asset_account
	self.employee_contribution_account = employee_contribution_account
	self.employer_contribution_account = employer_contribution_account
	self.employee_contribution_permille = employee_contribution_permille
	self.employer_contribution_permille = employer_contribution_permille

	def pension_buyin(
	self,
	amnt: decimal.Decimal,
	) -> data.Posting:
	return data.Posting(
	self.asset_account,
	amount.Amount(amnt, "AXA_PILLAR2"),
	position.Cost(
	decimal.Decimal(1.0),
	"CHF",
	typing.cast(datetime.date, None),
	None,
	),
	None,
	None,
	None,
	)

	def process(
	self,
	line: pd.DataFrame,
	) -> List[data.Posting]:
	employee_contribution = -self.extract_number(line.iloc[-1])
	employer_contribution = round(
	(
	employee_contribution
	* decimal.Decimal(self.employer_contribution_permille)
	)
	/ decimal.Decimal(self.employee_contribution_permille),
	2,
	)
	postings = [
	data.Posting(
	self.employer_contribution_account,
	amount.Amount(-employer_contribution, "CHF"),
	None,
	None,
	None,
	None,
	),
	self.pension_buyin(employer_contribution),
	self.pension_buyin(employee_contribution),
	]
	return postings


	PROCESSOR_MAPPINGS = {
	"Monthly Salary": IncomeLineProcessor("Income:Google:Salary"),
	"ER Health Insurance": IncomeLineProcessor(
	"Income:Google:HealthInsuranceContribution"
	),
	"Peer Bonus": IncomeLineProcessor("Income:Google:PeerBonus"),
	"Spot Bonus Gross": IncomeLineProcessor("Income:Google:SpotBonus"),
	"Annual Bonus Gross": IncomeLineProcessor("Income:Google:AnnualBonus"),
	"Education Subsidy Gross": IncomeLineProcessor("Income:Google:EducationSubsidy"),
	"Meal Allowance Gross Up": IncomeLineProcessor(
	"Income:Google:MealAllowanceGrossUp"
	),
	"Meal Allowance Net": MealAllowanceProcessor(
	"Income:Google:MealAllowance", "Expenses:Google:Meals"
	),
	"WHT": ExpenseLineProcessor("Expenses:Tax:Withholding"),
	"Swiss Social Security": ExpenseLineProcessor("Expenses:Tax:AHV"),
	"Unemployment Insurance": ExpenseLineProcessor(
	"Expenses:Tax:UnemploymentInsurance"
	),
	"Pension Contribution EE": Pillar2Processor(
	"Assets:Pillar2",
	"Expenses:PensionContribution",
	"Income:Google:PensionContribution",
	),
	"MSSB Withholding Credit": IncomeLineProcessor("Expenses:Tax:Withholding"),
	"Net Pay": ExpenseLineProcessor("Assets:Transfer"),
	}


	class Importer(beangulp.Importer):

	def __init__(self, account):
	self._account = account

	@property
	def name(self):
	return "Google Payslips Importer"

	def identify(self, filepath):
	if "_Switzerland_Payslip_" not in filepath:
	return False
	mimetype, _ = mimetypes.guess_type(filepath)
	if mimetype != "application/pdf":
	return False
	return True

	def account(self, filepath: str) -> str:
	return self._account

	def filename(self, filepath) -> str:
	return "Payslip.pdf"

	def date(self, filepath: str) -> datetime.date:
	filename = os.path.basename(filepath)
	d = filename.split("_")[0]
	return datetime.datetime.strptime(d, "%Y%m%d").date()

	def extract(
	self,
	filepath: str,
	existing: data.Entries,
	) -> data.Entries:
	table = camelot.read_pdf(
	filepath,
	flavor="stream",
	table_areas=[
	",".join(
	(
	"0", # x1, left side of the page
	"600", # y1, should be enough to skip non-table parts
	"10000", # x2, beyond the right side of the page
	"40", # y2, should be enough to skip the page numbers
	)
	)
	],
	)[0].df
	postings = []
	for row in table.iloc:
	processor = PROCESSOR_MAPPINGS.get(row[0], None)
	if processor is not None:
	postings.extend(processor.process(row))
	transaction = data.Transaction( # type: ignore
	data.new_metadata(__file__, 0),
	self.date(filepath),
	flags.FLAG_OKAY,
	"self", # payee
	"Google Payslip", # narration
	data.EMPTY_SET, # type: ignore
	data.EMPTY_SET, # type: ignore
	postings,
	)
	return [
	transaction,
	]


	if __name__ == "__main__":
	main(Importer("Income:Google"))
No results found