Last active
December 10, 2024 23:53
-
-
Save carlthome/0ca8e2624654358c3e9442dd9d089a1d to your computer and use it in GitHub Desktop.
welch-s-t-test-for-independent-samples.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyOPlCR4Xyzt0objFeCNDNUM", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/carlthome/0ca8e2624654358c3e9442dd9d089a1d/welch-s-t-test-for-independent-samples.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 81 | |
}, | |
"id": "ifVSETFJpJyL", | |
"outputId": "30b0616c-71a0-4271-de36-6c99a6bc98d6" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" Degree of freedom 1 Degree of freedom 2 T-test p-value\n", | |
"0 8 29 1.556304 0.136493" | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-53db22ef-667e-460f-b7ac-c4f7c3a48695\" class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Degree of freedom 1</th>\n", | |
" <th>Degree of freedom 2</th>\n", | |
" <th>T-test</th>\n", | |
" <th>p-value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>8</td>\n", | |
" <td>29</td>\n", | |
" <td>1.556304</td>\n", | |
" <td>0.136493</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>\n", | |
" <div class=\"colab-df-buttons\">\n", | |
"\n", | |
" <div class=\"colab-df-container\">\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-53db22ef-667e-460f-b7ac-c4f7c3a48695')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
"\n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
"\n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" .colab-df-buttons div {\n", | |
" margin-bottom: 4px;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-53db22ef-667e-460f-b7ac-c4f7c3a48695 button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-53db22ef-667e-460f-b7ac-c4f7c3a48695');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
"\n", | |
" </div>\n", | |
" </div>\n" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "dataframe", | |
"summary": "{\n \"name\": \"pd\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"Degree of freedom 1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 8,\n \"max\": 8,\n \"num_unique_values\": 1,\n \"samples\": [\n 8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Degree of freedom 2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 29,\n \"max\": 29,\n \"num_unique_values\": 1,\n \"samples\": [\n 29\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"T-test\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 1.556304247842829,\n \"max\": 1.556304247842829,\n \"num_unique_values\": 1,\n \"samples\": [\n 1.556304247842829\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"p-value\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.13649272437710608,\n \"max\": 0.13649272437710608,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.13649272437710608\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
} | |
}, | |
"metadata": {}, | |
"execution_count": 1 | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import scipy.stats as stats\n", | |
"\n", | |
"\n", | |
"def compute_t_test():\n", | |
" # Create synthetic data for testing.\n", | |
" group1 = np.random.normal(0.7, 0.4, size=9)\n", | |
" group2 = np.random.normal(0.5, 0.4, size=30)\n", | |
"\n", | |
" # Compute mean, variance and size of groups.\n", | |
" variance1 = np.var(group1, ddof=1)\n", | |
" variance2 = np.var(group2, ddof=1)\n", | |
"\n", | |
" df1 = len(group1) - 1\n", | |
" df2 = len(group2) - 1\n", | |
"\n", | |
" mean1 = np.mean(group1)\n", | |
" mean2 = np.mean(group2)\n", | |
"\n", | |
" size1 = len(group1)\n", | |
" size2 = len(group2)\n", | |
"\n", | |
" # T-test for means of two independent samples from descriptive statistics.\n", | |
" # This is a test for the null hypothesis that two independent samples have identical average (expected) values.\n", | |
" t_test, p_value = stats.ttest_ind_from_stats(\n", | |
" mean1=mean1, std1=np.sqrt(variance1), nobs1=size1,\n", | |
" mean2=mean2, std2=np.sqrt(variance2), nobs2=size2,\n", | |
" equal_var=False,\n", | |
" )\n", | |
"\n", | |
" # Collect results.\n", | |
" scores = {\n", | |
" \"Degree of freedom 1\": df1,\n", | |
" \"Degree of freedom 2\": df2,\n", | |
" \"T-test\": t_test,\n", | |
" \"p-value\": p_value,\n", | |
" }\n", | |
" return scores\n", | |
"\n", | |
"\n", | |
"scores = compute_t_test()\n", | |
"pd.DataFrame(scores, index=[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%timeit compute_t_test()" | |
], | |
"metadata": { | |
"id": "9kb8JmaP_WwW", | |
"outputId": "3bc961e6-6d53-4791-d595-013f7976157a", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"186 µs ± 16.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"n = 0\n", | |
"m = 10000\n", | |
"for _ in range(m):\n", | |
" scores = compute_t_test()\n", | |
" n += int(scores[\"p-value\"] < 0.05)\n", | |
"n/m" | |
], | |
"metadata": { | |
"id": "JtlHWmpS9xW7", | |
"outputId": "5834083e-a686-402a-911e-04abf7957f38", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0.2266" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 3 | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment