Last active
December 14, 2023 08:59
-
-
Save MaxHalford/9fba0c2d6800d0f0643902bf57b99780 to your computer and use it in GitHub Desktop.
Decomposition without and with gaps
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Ratio decomposition" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Without gaps" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Some dummy data." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>period</th>\n", | |
" <th>group</th>\n", | |
" <th>volume</th>\n", | |
" <th>rate</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1999</td>\n", | |
" <td>A</td>\n", | |
" <td>1000</td>\n", | |
" <td>3.7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1999</td>\n", | |
" <td>B</td>\n", | |
" <td>900</td>\n", | |
" <td>3.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1999</td>\n", | |
" <td>C</td>\n", | |
" <td>1200</td>\n", | |
" <td>4.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2000</td>\n", | |
" <td>A</td>\n", | |
" <td>1000</td>\n", | |
" <td>6.4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2000</td>\n", | |
" <td>B</td>\n", | |
" <td>900</td>\n", | |
" <td>3.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2000</td>\n", | |
" <td>C</td>\n", | |
" <td>800</td>\n", | |
" <td>4.6</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" period group volume rate\n", | |
"0 1999 A 1000 3.7\n", | |
"1 1999 B 900 3.5\n", | |
"2 1999 C 1200 4.5\n", | |
"3 2000 A 1000 6.4\n", | |
"4 2000 B 900 3.9\n", | |
"5 2000 C 800 4.6" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(data := pd.DataFrame(\n", | |
" [\n", | |
" (1999, 'A', 1000, 3.7),\n", | |
" (1999, 'B', 900, 3.5),\n", | |
" (1999, 'C', 1200, 4.5),\n", | |
" (2000, 'A', 1000, 6.4),\n", | |
" (2000, 'B', 900, 3.9),\n", | |
" (2000, 'C', 800, 4.6),\n", | |
" ],\n", | |
" columns=['period', 'group', 'volume', 'rate']\n", | |
"))\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Aggregated rates at each period:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"period\n", | |
"1999 3.951613\n", | |
"2000 5.033333\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(period_rates := (\n", | |
" data\n", | |
" .groupby('period')\n", | |
" .apply(lambda x: x['rate'] * x['volume'] / x['volume'].sum()).groupby('period')\n", | |
" .sum()\n", | |
"))\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Rate difference between periods to explain:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"period\n", | |
"1999 NaN\n", | |
"2000 1.08172\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"period_rates.diff()\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We want to explain the difference through the inner and mix contributions of each group.\n", | |
"\n", | |
"Implementation available here: https://maxhalford.github.io/blog/kpi-evolution-decomposition/#implementation-1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>period</th>\n", | |
" <th>group</th>\n", | |
" <th>volume</th>\n", | |
" <th>rate</th>\n", | |
" <th>total</th>\n", | |
" <th>period_rate</th>\n", | |
" <th>share</th>\n", | |
" <th>rate_lag</th>\n", | |
" <th>share_lag</th>\n", | |
" <th>period_rate_lag</th>\n", | |
" <th>inner</th>\n", | |
" <th>mix</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1999</td>\n", | |
" <td>A</td>\n", | |
" <td>1000</td>\n", | |
" <td>3.7</td>\n", | |
" <td>3700.0</td>\n", | |
" <td>3.951613</td>\n", | |
" <td>0.322581</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1999</td>\n", | |
" <td>B</td>\n", | |
" <td>900</td>\n", | |
" <td>3.5</td>\n", | |
" <td>3150.0</td>\n", | |
" <td>3.951613</td>\n", | |
" <td>0.290323</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1999</td>\n", | |
" <td>C</td>\n", | |
" <td>1200</td>\n", | |
" <td>4.5</td>\n", | |
" <td>5400.0</td>\n", | |
" <td>3.951613</td>\n", | |
" <td>0.387097</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2000</td>\n", | |
" <td>A</td>\n", | |
" <td>1000</td>\n", | |
" <td>6.4</td>\n", | |
" <td>6400.0</td>\n", | |
" <td>5.033333</td>\n", | |
" <td>0.370370</td>\n", | |
" <td>3.7</td>\n", | |
" <td>0.322581</td>\n", | |
" <td>3.951613</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.176822</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2000</td>\n", | |
" <td>B</td>\n", | |
" <td>900</td>\n", | |
" <td>3.9</td>\n", | |
" <td>3510.0</td>\n", | |
" <td>5.033333</td>\n", | |
" <td>0.333333</td>\n", | |
" <td>3.5</td>\n", | |
" <td>0.290323</td>\n", | |
" <td>3.951613</td>\n", | |
" <td>0.133333</td>\n", | |
" <td>0.150538</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2000</td>\n", | |
" <td>C</td>\n", | |
" <td>800</td>\n", | |
" <td>4.6</td>\n", | |
" <td>3680.0</td>\n", | |
" <td>5.033333</td>\n", | |
" <td>0.296296</td>\n", | |
" <td>4.5</td>\n", | |
" <td>0.387097</td>\n", | |
" <td>3.951613</td>\n", | |
" <td>0.029630</td>\n", | |
" <td>-0.408602</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" period group volume rate total period_rate share rate_lag \\\n", | |
"0 1999 A 1000 3.7 3700.0 3.951613 0.322581 NaN \n", | |
"1 1999 B 900 3.5 3150.0 3.951613 0.290323 NaN \n", | |
"2 1999 C 1200 4.5 5400.0 3.951613 0.387097 NaN \n", | |
"3 2000 A 1000 6.4 6400.0 5.033333 0.370370 3.7 \n", | |
"4 2000 B 900 3.9 3510.0 5.033333 0.333333 3.5 \n", | |
"5 2000 C 800 4.6 3680.0 5.033333 0.296296 4.5 \n", | |
"\n", | |
" share_lag period_rate_lag inner mix \n", | |
"0 NaN NaN NaN NaN \n", | |
"1 NaN NaN NaN NaN \n", | |
"2 NaN NaN NaN NaN \n", | |
"3 0.322581 3.951613 1.000000 0.176822 \n", | |
"4 0.290323 3.951613 0.133333 0.150538 \n", | |
"5 0.387097 3.951613 0.029630 -0.408602 " | |
] | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"decomp = data.copy()\n", | |
"decomp['total'] = decomp.eval('volume * rate')\n", | |
"decomp['period_rate'] = (\n", | |
" decomp.groupby('period')['total'].transform('sum') /\n", | |
" decomp.groupby('period')['volume'].transform('sum')\n", | |
")\n", | |
"decomp['share'] = (\n", | |
" decomp['volume'] /\n", | |
" decomp.groupby('period')['volume'].transform('sum')\n", | |
")\n", | |
"decomp['rate_lag'] = decomp.groupby('group')['rate'].shift(1)\n", | |
"decomp['share_lag'] = decomp.groupby('group')['share'].shift(1)\n", | |
"decomp['period_rate_lag'] = decomp.groupby('group')['period_rate'].shift(1)\n", | |
"decomp['inner'] = decomp.eval('share * (rate - rate_lag)')\n", | |
"decomp['mix'] = decomp.eval('(share - share_lag) * rate_lag')\n", | |
"decomp" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We can verify the contributions sum up to the rate difference:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Inner: 1.163\n", | |
"Mix: -0.081\n" | |
] | |
} | |
], | |
"source": [ | |
"import math\n", | |
"\n", | |
"assert math.isclose(decomp['inner'].sum() + decomp['mix'].sum(), period_rates.diff().sum())\n", | |
"print(\n", | |
" f'Inner: {decomp[\"inner\"].sum():.3f}\\n'\n", | |
" f'Mix: {decomp[\"mix\"].sum():.3f}'\n", | |
")\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## With gaps" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>period</th>\n", | |
" <th>group</th>\n", | |
" <th>volume</th>\n", | |
" <th>rate</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1999</td>\n", | |
" <td>A</td>\n", | |
" <td>1000</td>\n", | |
" <td>3.7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1999</td>\n", | |
" <td>C</td>\n", | |
" <td>1200</td>\n", | |
" <td>4.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2000</td>\n", | |
" <td>B</td>\n", | |
" <td>900</td>\n", | |
" <td>3.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2000</td>\n", | |
" <td>C</td>\n", | |
" <td>800</td>\n", | |
" <td>4.6</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" period group volume rate\n", | |
"0 1999 A 1000 3.7\n", | |
"1 1999 C 1200 4.5\n", | |
"2 2000 B 900 3.9\n", | |
"3 2000 C 800 4.6" | |
] | |
}, | |
"execution_count": 33, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(data := pd.DataFrame(\n", | |
" [\n", | |
" (1999, 'A', 1000, 3.7),\n", | |
" (1999, 'C', 1200, 4.5),\n", | |
" (2000, 'B', 900, 3.9),\n", | |
" (2000, 'C', 800, 4.6),\n", | |
" ],\n", | |
" columns=['period', 'group', 'volume', 'rate']\n", | |
"))\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The trick is to add a row for each period where the group is not present." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>period</th>\n", | |
" <th>group</th>\n", | |
" <th>volume</th>\n", | |
" <th>rate</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1999</td>\n", | |
" <td>A</td>\n", | |
" <td>1000.0</td>\n", | |
" <td>3.7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1999</td>\n", | |
" <td>C</td>\n", | |
" <td>1200.0</td>\n", | |
" <td>4.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1999</td>\n", | |
" <td>B</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2000</td>\n", | |
" <td>A</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2000</td>\n", | |
" <td>C</td>\n", | |
" <td>800.0</td>\n", | |
" <td>4.6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2000</td>\n", | |
" <td>B</td>\n", | |
" <td>900.0</td>\n", | |
" <td>3.9</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" period group volume rate\n", | |
"0 1999 A 1000.0 3.7\n", | |
"1 1999 C 1200.0 4.5\n", | |
"2 1999 B 0.0 0.0\n", | |
"3 2000 A 0.0 0.0\n", | |
"4 2000 C 800.0 4.6\n", | |
"5 2000 B 900.0 3.9" | |
] | |
}, | |
"execution_count": 34, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"periods = pd.Series(data.period.unique(), name='period').to_frame()\n", | |
"groups = pd.Series(data.group.unique(), name='group').to_frame()\n", | |
"product = periods.merge(groups, how='cross')\n", | |
"data = pd.merge(data, product, left_on=['period', 'group'], right_on=['period', 'group'], how='right')\n", | |
"data['volume'] = data['volume'].fillna(0)\n", | |
"data['rate'] = data['rate'].fillna(0)\n", | |
"data\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"period\n", | |
"1999 4.136364\n", | |
"2000 4.229412\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 35, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(period_rates := (\n", | |
" data\n", | |
" .groupby('period')\n", | |
" .apply(lambda x: x['rate'] * x['volume'] / x['volume'].sum()).groupby('period')\n", | |
" .sum()\n", | |
"))\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"period\n", | |
"1999 NaN\n", | |
"2000 0.093048\n", | |
"dtype: float64" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"period_rates.diff()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>period</th>\n", | |
" <th>group</th>\n", | |
" <th>volume</th>\n", | |
" <th>rate</th>\n", | |
" <th>total</th>\n", | |
" <th>period_rate</th>\n", | |
" <th>share</th>\n", | |
" <th>rate_lag</th>\n", | |
" <th>share_lag</th>\n", | |
" <th>period_rate_lag</th>\n", | |
" <th>inner</th>\n", | |
" <th>mix</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1999</td>\n", | |
" <td>A</td>\n", | |
" <td>1000.0</td>\n", | |
" <td>3.7</td>\n", | |
" <td>3700.0</td>\n", | |
" <td>4.136364</td>\n", | |
" <td>0.454545</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1999</td>\n", | |
" <td>C</td>\n", | |
" <td>1200.0</td>\n", | |
" <td>4.5</td>\n", | |
" <td>5400.0</td>\n", | |
" <td>4.136364</td>\n", | |
" <td>0.545455</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1999</td>\n", | |
" <td>B</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>4.136364</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2000</td>\n", | |
" <td>A</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>4.229412</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>3.7</td>\n", | |
" <td>0.454545</td>\n", | |
" <td>4.136364</td>\n", | |
" <td>-0.000000</td>\n", | |
" <td>-1.681818</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2000</td>\n", | |
" <td>C</td>\n", | |
" <td>800.0</td>\n", | |
" <td>4.6</td>\n", | |
" <td>3680.0</td>\n", | |
" <td>4.229412</td>\n", | |
" <td>0.470588</td>\n", | |
" <td>4.5</td>\n", | |
" <td>0.545455</td>\n", | |
" <td>4.136364</td>\n", | |
" <td>0.047059</td>\n", | |
" <td>-0.336898</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2000</td>\n", | |
" <td>B</td>\n", | |
" <td>900.0</td>\n", | |
" <td>3.9</td>\n", | |
" <td>3510.0</td>\n", | |
" <td>4.229412</td>\n", | |
" <td>0.529412</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>4.136364</td>\n", | |
" <td>2.064706</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" period group volume rate total period_rate share rate_lag \\\n", | |
"0 1999 A 1000.0 3.7 3700.0 4.136364 0.454545 NaN \n", | |
"1 1999 C 1200.0 4.5 5400.0 4.136364 0.545455 NaN \n", | |
"2 1999 B 0.0 0.0 0.0 4.136364 0.000000 NaN \n", | |
"3 2000 A 0.0 0.0 0.0 4.229412 0.000000 3.7 \n", | |
"4 2000 C 800.0 4.6 3680.0 4.229412 0.470588 4.5 \n", | |
"5 2000 B 900.0 3.9 3510.0 4.229412 0.529412 0.0 \n", | |
"\n", | |
" share_lag period_rate_lag inner mix \n", | |
"0 NaN NaN NaN NaN \n", | |
"1 NaN NaN NaN NaN \n", | |
"2 NaN NaN NaN NaN \n", | |
"3 0.454545 4.136364 -0.000000 -1.681818 \n", | |
"4 0.545455 4.136364 0.047059 -0.336898 \n", | |
"5 0.000000 4.136364 2.064706 0.000000 " | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"decomp = data.copy()\n", | |
"decomp['total'] = decomp.eval('volume * rate')\n", | |
"decomp['period_rate'] = (\n", | |
" decomp.groupby('period')['total'].transform('sum') /\n", | |
" decomp.groupby('period')['volume'].transform('sum')\n", | |
")\n", | |
"decomp['share'] = (\n", | |
" decomp['volume'] /\n", | |
" decomp.groupby('period')['volume'].transform('sum')\n", | |
")\n", | |
"decomp['rate_lag'] = decomp.groupby('group')['rate'].shift(1)\n", | |
"decomp['share_lag'] = decomp.groupby('group')['share'].shift(1)\n", | |
"decomp['period_rate_lag'] = decomp.groupby('group')['period_rate'].shift(1)\n", | |
"decomp['inner'] = decomp.eval('share * (rate - rate_lag)')\n", | |
"decomp['mix'] = decomp.eval('(share - share_lag) * rate_lag')\n", | |
"decomp\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Inner: 2.112\n", | |
"Mix: -2.019\n" | |
] | |
} | |
], | |
"source": [ | |
"import math\n", | |
"\n", | |
"assert math.isclose(decomp['inner'].sum() + decomp['mix'].sum(), period_rates.diff().sum())\n", | |
"print(\n", | |
" f'Inner: {decomp.query(\"period == 2000\")[\"inner\"].sum():.3f}\\n'\n", | |
" f'Mix: {decomp.query(\"period == 2000\")[\"mix\"].sum():.3f}'\n", | |
")\n" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment