Last active
November 19, 2021 12:38
-
-
Save jkbjh/961db26168d6e50f5c631ca618415d03 to your computer and use it in GitHub Desktop.
Plot covid incidence / death numbers for selected countries (austria+neighbors+uk)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import countryinfo # for populations | |
FUTURE = 28 | |
POPULATION = { | |
"Czechia": countryinfo.CountryInfo("Czech Republic").population(), | |
} | |
NORMALIZE_TO = 100000 | |
WINDOW_DAYS = 7 | |
URL = "https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv" | |
URL_DEATHS = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv" | |
if __name__ == "__main__": | |
covid19 = pd.read_csv(URL) | |
covid19_deaths = pd.read_csv(URL_DEATHS) | |
# no provinces | |
covid19 = covid19[covid19["Province/State"].isnull()] | |
covid19_deaths = covid19_deaths[covid19_deaths["Province/State"].isnull()] | |
def prepare_data(covid19, region): | |
at_c19 = covid19[covid19["Country/Region"] == region] | |
# the first four columns 'Province/State', 'Country/Region', 'Lat', 'Long' followed by actual data points, so we skip these. | |
at_cases = at_c19[at_c19.columns[4:]] | |
# in rows... | |
at_cases_rows = at_cases.T | |
new_cases = at_cases_rows.diff() | |
# 7 day incidence | |
if region not in POPULATION: | |
POPULATION[region] = countryinfo.CountryInfo(region).population() | |
print(f"population {region}: {POPULATION[region]}") | |
seven_day_incidence = (new_cases / POPULATION[region] * NORMALIZE_TO).rolling(window=WINDOW_DAYS).sum() | |
# we use the latest data point as label in the legend so we need the value | |
last_value = seven_day_incidence.iloc[-1].tolist()[0] | |
# and the label is generated from the column name so we change that: | |
data_renamed = seven_day_incidence.rename( | |
columns={ | |
seven_day_incidence.columns[0]: "%s %3.1f" | |
% ( | |
region, | |
last_value, | |
) | |
} | |
) | |
return data_renamed, new_cases, last_value | |
def plot_for_region(region="Austria", axes=None): | |
data_renamed, new_cases, last_value = prepare_data(covid19, region) | |
death_renamed, new_death, last_death_value = prepare_data(covid19_deaths, region) | |
axes = data_renamed.plot( | |
logy=True, # in the logy scale the growth is linear. use your ruler to predict the future :-/ | |
grid=True, | |
xticks=range(0, len(new_cases) + FUTURE, WINDOW_DAYS), # get a label for one day per week on the x-axis | |
label=str(last_value), | |
ax=axes, | |
) | |
line_color = axes.get_lines()[-1].get_color() | |
axes = death_renamed.plot( | |
logy=True, # in the logy scale the growth is linear. use your ruler to predict the future :-/ | |
grid=True, | |
xticks=range(0, len(new_death) + FUTURE, WINDOW_DAYS), # get a label for one day per week on the x-axis | |
label=str(last_value), | |
ax=axes, | |
linestyle="dashed", | |
color=line_color, | |
) | |
line_color = axes.get_lines()[-1].get_color() | |
return len(new_cases), line_color | |
plt.close() | |
axes = plt.subplot(111) | |
plt.title("%s day incidence per %d" % (WINDOW_DAYS, NORMALIZE_TO)) | |
num_days = plot_for_region("Austria", axes=axes) | |
for country in ["Germany", "Switzerland", "Liechtenstein", "Italy", "Slovenia", "Hungary", "Slovakia", "Czechia"]: | |
_, lines = plot_for_region(country, axes=axes) | |
plot_for_region("United Kingdom", axes=axes) | |
axes.set_ylim(0.01, axes.get_ylim()[1]) | |
plt.xticks(rotation="vertical") | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment