Last active
August 22, 2020 15:47
-
-
Save nhatley/4ed3f3ced6aa44ade7531cf49a135467 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#code from start to finish | |
##install or update packages if neccessary | |
#install.packages("tidyverse") | |
#install.packages("haven") | |
##load packages in | |
library(tidyverse) #loads all "core" tidyverse packages like dplyr, tidyr, forcats, and ggplot2 | |
library(haven) | |
##read dataset in with value labels (as_factor) | |
Apr17 <- read_sav("Apr17 public.sav", user_na = TRUE) %>% as_factor() | |
##create trump_approval variable and relevel it | |
Apr17 <- Apr17 %>% | |
mutate(trump_approval = case_when( | |
q1 == "Approve" & q1a == "Very strongly" ~ "Strongly approve", | |
q1 == "Approve" & q1a != "Very strongly" ~ "Not strongly approve", | |
q1 == "Disapprove" & q1a == "Very strongly" ~ "Strongly disapprove", | |
q1 == "Disapprove" & q1a != "Very strongly" ~ "Not strongly disapprove", | |
q1 == "Don't know/Refused (VOL.)" | q1a == "Don't know/Refused (VOL.)" ~ "Refused" | |
) #this parentheses closes our call to case_when | |
%>% #and then sends it to fct_relevel with %>% | |
fct_relevel("Strongly approve", | |
"Not strongly approve", | |
"Not strongly disapprove", | |
"Strongly disapprove", | |
"Refused" | |
) #this parentheses closes our call to fct_relevel | |
) #this parentheses closes our call to mutate | |
## collapse education variable into 3 categories | |
Apr17 <- Apr17 %>% | |
mutate(educ_cat = fct_collapse(educ2, | |
"High school grad or less" = c( | |
"Less than high school (Grades 1-8 or no formal schooling)", | |
"High school incomplete (Grades 9-11 or Grade 12 with NO diploma)", | |
"High school graduate (Grade 12 with diploma or GED certificate)" | |
), | |
"Some college" = c( | |
"Some college, no degree (includes some community college)", | |
"Two year associate degree from a college or university" | |
), | |
"College grad+" = c( | |
"Four year college or university degree/Bachelor's degree (e.g., BS, BA, AB)", | |
"Some postgraduate or professional schooling, no postgraduate degree", | |
"Postgraduate or professional degree, including master's, doctorate, medical or law degree" | |
) | |
) #this parentheses closes our call to fct_collapse | |
) #this parentheses closes our call to mutate | |
##get trump_approval weighted totals | |
trump_approval <- Apr17 %>% | |
group_by(trump_approval) %>% | |
summarise(weighted_n = sum(weight)) | |
##get trump_approval weighted proportions | |
trump_approval <- Apr17 %>% | |
##group by trump_approval to calculated weighted totals by taking the sum of the weights | |
group_by(trump_approval) %>% | |
summarise(weighted_n = sum(weight)) %>% | |
##add the weighted_group_size to get the total weighted n and | |
##divide weighted_n by weighted_group_size to get the proportions | |
mutate(weighted_group_size = sum(weighted_n), | |
weighted_estimate = weighted_n / weighted_group_size | |
) | |
##get trump_approval by education | |
trump_estimates_educ <- Apr17 %>% | |
#group by educ and trump approval to get weighted n's per group | |
group_by(educ_cat, trump_approval) %>% | |
#calculate the total number of people in each answer and education category using survey weights (weight) | |
summarise(weighted_n = sum(weight)) %>% | |
#group by education to calculate education category size | |
group_by(educ_cat) %>% | |
#add columns for total group size and the proportion | |
mutate(weighted_group_size = sum(weighted_n), | |
weighted_estimate = weighted_n/weighted_group_size) | |
##select only columns interested in for this analysis | |
###rename psraid to resp_id | |
Apr17 <- Apr17 %>% | |
select(resp_id = psraid, weight, trump_approval, educ_cat, racethn, gen5) | |
##create Apr_17 long with gather | |
Apr17_long <- Apr17 %>% | |
#gather educ_cat, racethn, gen5 into two columns: | |
##a key called "subgroup variable" (educ_cat, racethn, gen5) | |
##and a value called "subgroup" | |
gather(key = subgroup_variable, value = subgroup, educ_cat, racethn, gen5) | |
##get weighted estimates for every subgroup | |
trump_estimates <- Apr17_long %>% | |
#group by subgroup_variable, subgroup, and trump approval to get weighted n of approval/disapproval for all subgroup cats | |
group_by(subgroup_variable, subgroup, trump_approval) %>% | |
#calculate the total number of people in each answer and education category using survey weights (weight) | |
summarise(weighted_n = sum(weight)) %>% | |
#group by subgroup only to calculate subgroup category size | |
group_by(subgroup) %>% | |
#add columns for total group size and the proportion | |
mutate(weighted_group_size = sum(weighted_n), | |
weighted_estimate = weighted_n/weighted_group_size) | |
#only want proportions so select out total categories | |
trump_estimates <- trump_estimates %>% | |
select(-weighted_n, -weighted_group_size) | |
##create plot | |
trump_estimates %>% | |
##remove "Refused" category for Trump Approval | |
filter(trump_approval != "Refused") %>% | |
##remove Refused categories in our subgroup values | |
filter(!(subgroup %in% c("Don't know/Refused (VOL.)", "DK/Ref"))) %>% | |
ggplot( | |
aes( | |
x = weighted_estimate, | |
y = subgroup | |
) | |
) + | |
geom_point() + | |
scale_x_continuous(limits = c(0, .8), | |
breaks = seq(0, .6, by = .2), | |
labels = scales::percent(seq(0, .6, by = .2), accuracy = 1) | |
) + | |
facet_grid(cols = vars(trump_approval), | |
rows = vars(subgroup_variable), | |
scales = "free_y", | |
space = "free" | |
) + | |
theme_bw() + | |
theme(axis.title.y = element_blank()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment