Created
August 18, 2022 14:29
-
-
Save jcrist/f808ceba485ab1ca7580299297b59281 to your computer and use it in GitHub Desktop.
Snippets used in a twitter thread about ibis-datasette
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In [1]: import ibis | |
In [2]: ibis.options.interactive = True # enable interactive mode | |
In [3]: con = ibis.sqlite.connect("legislators.db") # connect to a database | |
In [4]: legislators = con.tables["legislators"] # access tables | |
In [5]: legislators.groupby("bio_gender").count() # query using a dataframe-like API | |
Out[5]: | |
bio_gender count | |
0 F 399 | |
1 M 12195 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In [6]: terms = con.tables["legislator_terms"] | |
In [7]: first_female_rep = ( | |
...: legislators | |
...: .join(terms, legislators.id == terms.legislator_id) | |
...: .filter(lambda _: _.bio_gender == "F") | |
...: .select("name", "state", "start") | |
...: .sort_by("start") | |
...: .limit(1) | |
...: ) | |
In [8]: first_female_rep | |
Out[8]: | |
name state start | |
0 Jeannette Rankin MT 1917-04-02 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In [9]: percent_female_by_decade = ( | |
...: legislators | |
...: .join(terms, legislators.id == terms.legislator_id) | |
...: .select("bio_gender", "start") | |
...: .mutate( | |
...: decade=lambda _: (ibis.date(_.start).year() / 10).cast("int32") * 10 | |
...: ) | |
...: .group_by("decade") | |
...: .aggregate( | |
...: n_female=lambda _: (_.bio_gender == "F").sum(), | |
...: n_total=lambda _: _.count() | |
...: ) | |
...: .mutate( | |
...: percent_female=lambda _: 100 * (_.n_female / _.n_total) | |
...: ) | |
...: .filter(lambda _: _.percent_female > 0) | |
...: .select("decade", "percent_female") | |
...: ) | |
In [10]: percent_female_by_decade | |
Out[10]: | |
decade percent_female | |
0 1910 0.040584 | |
1 1920 0.883179 | |
2 1930 1.608363 | |
3 1940 1.845166 | |
4 1950 3.030303 | |
5 1960 2.718287 | |
6 1970 3.592073 | |
7 1980 4.977188 | |
8 1990 10.830922 | |
9 2000 15.865783 | |
10 2010 20.196641 | |
11 2020 27.789047 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In [11]: ibis.show_sql(percent_female_by_decade) | |
SELECT | |
t0.decade, | |
t0.percent_female | |
FROM ( | |
SELECT | |
t1.decade AS decade, | |
t1.n_female AS n_female, | |
t1.n_total AS n_total, | |
t1.percent_female AS percent_female | |
FROM ( | |
SELECT | |
t2.decade AS decade, | |
t2.n_female AS n_female, | |
t2.n_total AS n_total, | |
( | |
t2.n_female / CAST(t2.n_total AS REAL) | |
) * 100 AS percent_female | |
FROM ( | |
SELECT | |
t3.decade AS decade, | |
SUM(CAST(t3.bio_gender = 'F' AS INTEGER)) AS n_female, | |
COUNT('*') AS n_total | |
FROM ( | |
SELECT | |
t4.bio_gender AS bio_gender, | |
t4.start AS start, | |
CAST(CAST(STRFTIME('%Y', DATE(t4.start)) AS INTEGER) / CAST(10 AS REAL) AS INTEGER) * 10 AS decade | |
FROM ( | |
SELECT | |
bio_gender, | |
start | |
FROM main.legislators AS t5 | |
JOIN main.legislator_terms AS t6 | |
ON t5.id = t6.legislator_id | |
) AS t4 | |
) AS t3 | |
GROUP BY | |
t3.decade | |
) AS t2 | |
) AS t1 | |
WHERE | |
t1.percent_female > 0 | |
) AS t0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment