Last active
April 27, 2018 17:53
-
-
Save jkaupp/1456f79362b5f05270868416d083a9e3 to your computer and use it in GitHub Desktop.
STEM Funding Plot
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(qualtRics) | |
library(rlang) | |
library(viridis) | |
library(hrbrthemes) | |
registerOptions(api_token="TOTALLY_NOT_MY_TOKEN", root_url="https://queensu.qualtrics.com") | |
surveys <- getSurveys() | |
srvy_id <- surveys %>% | |
filter(grepl("STEM", name)) %>% | |
pull(id) | |
nserc_funding_data <- getSurvey(surveyID = srvy_id, force_request = TRUE) | |
funding_levels <- c("$0-$2,000", "$2,001-$5,000", "$5,001-$10,000", "$10,001-$20,000", "$20,001-$50,000", "$50,001-$100,000", "$100,001-$200,000", "$200,001-$500,000", "$500,001-$1,000,000", "$over 1 million") | |
labels <- nserc_funding_data %>% | |
select(matches("Q[2-5]")) %>% | |
map_chr(function(x) attributes(x)$label) %>% | |
tibble(item = names(.), | |
label = .) | |
resp <- filter(labels, grepl("Q5", item)) %>% | |
pull(label) | |
tidy_survey <- nserc_funding_data %>% | |
select(matches("Q[2-5]")) %>% | |
gather(item, value, -Q2, -Q3, -Q2_TEXT, -Q3_TEXT,na.rm = TRUE) %>% | |
mutate(question = str_extract(item, "Q\\d")) %>% | |
rename(primary_appointment = Q2, | |
funding_in = Q3) %>% | |
mutate_at(c("primary_appointment", "funding_in"), function(x) replace(x, grepl("other", x,ignore.case = TRUE), NA)) %>% | |
mutate_all(as.character) %>% | |
mutate(primary_appointment = coalesce(primary_appointment, Q2_TEXT), | |
funding_in = coalesce(funding_in, Q3_TEXT)) %>% | |
select(question, item, value, primary_appointment, funding_in) %>% | |
left_join(labels) | |
# Faceted plot by Primary Appointment ----- | |
discipline_n <- nserc_funding_data %>% | |
rename(primary_appointment = Q2, | |
funding_in = Q3) %>% | |
mutate_at(c("primary_appointment", "funding_in"), function(x) replace(x, grepl("other", x,ignore.case = TRUE), NA)) %>% | |
mutate_all(as.character) %>% | |
mutate(primary_appointment = coalesce(primary_appointment, Q2_TEXT), | |
funding_in = coalesce(funding_in, Q3_TEXT)) %>% | |
count(primary_appointment) %>% | |
filter(primary_appointment %in% c("Education","Engineering", "Science")) | |
plot_data <- tidy_survey %>% | |
filter(question == "Q5", primary_appointment != 'Technology') %>% | |
filter(primary_appointment %in% c("Education","Engineering","Science")) %>% | |
mutate(value = stringi::stri_replace_last_regex(value, "\\$", "")) %>% | |
mutate(value = factor(value, stringi::stri_replace_last_regex(funding_levels, "\\$", ""))) %>% | |
filter(value != "$0-2,000") %>% | |
mutate(value = fct_drop(value)) %>% | |
select(-funding_in) %>% | |
count(label, value, primary_appointment) %>% | |
ungroup() %>% | |
complete(label, value, nesting(primary_appointment), fill = list(n = 0)) %>% | |
left_join(discipline_n, by = "primary_appointment") %>% | |
mutate(label = factor(label, rev(resp))) %>% | |
mutate(percent = n.x/n.y) %>% | |
mutate(percent = replace(percent, is.nan(percent), 0)) %>% | |
ungroup() %>% | |
mutate(primary_appointment = glue::glue("{primary_appointment}: {n.y} responses")) %>% | |
mutate(value = fct_recode(value, "> 1-million" = "over 1 million")) | |
plot <- ggplot(plot_data, aes(x = value, y = label, fill = percent)) + | |
geom_tile(color = "white") + | |
scale_fill_viridis("Response Frequency", option = "cividis", labels = scales::percent) + | |
scale_x_discrete(expand=c(0,0)) + | |
scale_y_discrete(expand=c(0,0)) + | |
labs(title = "Sources of STEM Education Research Funding in Canada", | |
subtitle = "Results from a survey of funding sources of Canadian STEM education researchers", | |
caption = "Survey conducted by a Special Interest Group within the Canadian Engineering Education Association", | |
x = NULL, y = NULL) + | |
facet_wrap(~primary_appointment, nrow = 1 ) + | |
coord_equal() + | |
theme_ipsum_rc(base_size = 16, grid = FALSE, plot_title_size = 20, subtitle_size = 18) + | |
theme(legend.position = "bottom", | |
legend.key.width = unit(1, "cm"), | |
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) | |
ggsave("STEM ED Funding by Primary Appointment.png", plot, width = 15, height = 10) | |
# Combined plot ---- | |
total_n <- nserc_funding_data %>% | |
filter(!is.na(Q2)) %>% | |
nrow() | |
combined_plot_data <- tidy_survey %>% | |
filter(question == "Q5", primary_appointment != 'Technology') %>% | |
filter(primary_appointment %in% c("Education","Engineering","Science")) %>% | |
mutate(value = stringi::stri_replace_last_regex(value, "\\$", "")) %>% | |
mutate(value = factor(value, stringi::stri_replace_last_regex(funding_levels, "\\$", ""))) %>% | |
filter(value != "$0-2,000") %>% | |
mutate(value = fct_drop(value)) %>% | |
select(-funding_in) %>% | |
count(label, value) %>% | |
ungroup() %>% | |
complete(label, value, fill = list(n = 0)) %>% | |
mutate(label = factor(label, rev(resp))) %>% | |
mutate(percent = n/total_n) %>% | |
mutate(percent = replace(percent, is.nan(percent), 0)) %>% | |
ungroup() %>% | |
mutate(value = fct_recode(value, "> 1-million" = "over 1 million")) | |
combined_plot <- ggplot(combined_plot_data, aes(x = value, y = label, fill = percent)) + | |
geom_tile(color = "white") + | |
scale_fill_viridis("Response Frequency", option = "cividis", labels = scales::percent) + | |
scale_x_discrete(expand=c(0,0)) + | |
scale_y_discrete(expand=c(0,0)) + | |
labs(title = "Sources of STEM Education Research Funding in Canada", | |
subtitle = "Results from a survey of funding sources of Canadian STEM education researchers", | |
caption = "Survey conducted by a Special Interest Group within the Canadian Engineering Education Association", | |
x = NULL, y = NULL) + | |
coord_equal() + | |
theme_ipsum_rc(base_size = 16, grid = FALSE, plot_title_size = 20, subtitle_size = 18) + | |
theme(legend.position = "bottom", | |
legend.key.width = unit(1, "cm"), | |
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) | |
ggsave("Overall STEM ED Funding.png", combined_plot, width = 15, height = 10) | |
recoded_eng_data <- tidy_survey %>% | |
filter(question == "Q5", primary_appointment != 'Technology') %>% | |
filter(primary_appointment == "Engineering") %>% | |
mutate(value = stringi::stri_replace_last_regex(value, "\\$", "")) %>% | |
mutate(value = factor(value, stringi::stri_replace_last_regex(funding_levels, "\\$", ""))) %>% | |
filter(value != "$0-2,000") %>% | |
mutate(value = fct_drop(value)) %>% | |
select(-funding_in) %>% | |
count(label, value) %>% | |
ungroup() %>% | |
complete(label, value, fill = list(n = 0)) %>% | |
mutate(label = factor(label, rev(resp))) %>% | |
mutate(percent = n/eng_total) %>% | |
mutate(percent = replace(percent, is.nan(percent), 0)) %>% | |
ungroup() %>% | |
mutate(value = fct_recode(value, "> 1-million" = "over 1 million")) %>% | |
mutate(label = trimws(label)) %>% | |
mutate( | |
label = fct_collapse( | |
label, | |
institutional = c( | |
"Funding from your Department", | |
"Funding from your Faculty/Dean's office", | |
"Funding from your University" | |
), | |
provincial = c( | |
"HEQCO", | |
"Provincial ministry" | |
), | |
federal = c("Federal ministry"), | |
ext_council = c( | |
"Non-Canadian research granting council (e.g. NSF)", | |
"Mitacs", | |
"Provincial council on articulation and transfer (e.g. BCCAT, ONCAT, etc.)", | |
"Non-profit organization" | |
), | |
private = c("Private industry", "Benefactor/private donor"), | |
other = "Other", | |
tri_council = c( | |
"CIHR", | |
"SSHRC graduate student award", | |
"SSHRC grant", | |
#"NSERC Chair in Design Engineering", | |
"NSERC graduate student award", | |
"NSERC grant" | |
) | |
) | |
) | |
summarized_data <- recoded_eng_data %>% | |
filter(label != "other") %>% | |
group_by(label) %>% | |
summarize(n = sum(n)) %>% | |
mutate(percent = n/eng_total) | |
axis_labels <- c("institutional" = "Institutional", | |
"tri_council" = "Tri-Council", | |
"provincial" = "Provincial Agencies & Ministry", | |
"ext_council" = "Non-Profit & US Councils", | |
"private" = "Private Benefactors & Funding", | |
"federal" = "Federal Agencies & Ministry") | |
source_plot <- ggplot(summarized_data, aes(x = reorder(label, percent), y =percent)) + | |
geom_col(fill = viridis(1, option = "cividis")) + | |
geom_text(aes(label = scales::percent(percent)), color = "white", family = "Roboto Condensed", hjust = 1, nudge_y = -0.01, data = filter(summarized_data, label != "federal")) + | |
geom_text(aes(label = scales::percent(percent)), color = "black", nudge_y = 0.05, family = "Roboto Condensed", data = filter(summarized_data, label == "federal")) + | |
coord_flip() + | |
scale_x_discrete(labels = function(x) axis_labels[x]) + | |
scale_y_continuous(labels = scales::percent, limits = c(0,1)) + | |
labs(title = "Primary Sources of Engineering Education Research Funding in Canada", | |
subtitle = str_wrap(glue::glue("Frequency of Engineering education funding by source, of {eng_total} responses. Results from a survey of funding sources of Canadian STEM education researchers."), 80), | |
caption = "Survey conducted by a Special Interest Group within the Canadian Engineering Education Association", | |
x = NULL, y = NULL) + | |
theme_ipsum_rc(base_size = 16, grid = "X", plot_title_size = 20, subtitle_size = 18) | |
ggsave("Frequency of Overall ENG ED Funding by source.png", source_plot, width = 16, height = 10) | |
summarized_funding <- recoded_eng_data %>% | |
filter(label != "other", label != "NSERC Chair in Design Engineering") %>% | |
mutate(value = gsub("\\$", "", value), | |
value = gsub("\\,", "", value)) %>% | |
mutate(value = ifelse(value == "> 1-million", "1000001-1000000", value)) %>% | |
separate(value, c("lower","upper"), sep = "-") %>% | |
mutate(lower = as.numeric(lower) -1) %>% | |
mutate(average = as.numeric(lower) + (as.numeric(upper) - as.numeric(lower))/2) %>% | |
mutate(funds = average * n) %>% | |
group_by(label) %>% | |
summarize(funds = sum(funds)) | |
funding_plot <- ggplot(summarized_funding, aes(x = reorder(label, funds), y = funds)) + | |
geom_col(fill = viridis(1, option = "cividis")) + | |
geom_text(aes(label = scales::dollar(funds)), color = "white", nudge_y = -25000, family = "Roboto Condensed", hjust = 1) + | |
#geom_text(aes(label = scales::dollar(funds)), color = "black", nudge_y = 150000, family = "Roboto Condensed", data = filter(summarized_funding, label == "federal")) + | |
coord_flip() + | |
scale_x_discrete(labels = function(x) axis_labels[x]) + | |
scale_y_continuous(labels = scales::dollar) + | |
labs(title = "Engineering Education Research Funding in Canada", | |
subtitle = str_wrap(glue::glue("Average Amount of Engineering Education funding by source, from {eng_total} responses. Average funding calculated as the mean value of a funding range reported (e.g. $2,000 - $5,000 = $3,500)."), 80), | |
caption = "\nData: Survey of funding sources of Canadian STEM education researchers. | Survey conducted by a Special Interest Group within the Canadian Engineering Education Association | Graphic: @jakekaupp", | |
x = NULL, y = NULL) + | |
theme_ipsum_rc(base_size = 16, grid = "X", plot_title_size = 20, subtitle_size = 18) | |
ggsave("Overall ENG ED Funding Amounts by source.png", funding_plot, width = 16, height = 10) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment