Created
March 30, 2019 16:51
-
-
Save wmay/8c36ec20d3f98eebbce4eeebd1664416 to your computer and use it in GitHub Desktop.
Quick analysis of recent sex trends (2008-2018) with GSS data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## looking at sex trends | |
library(foreign) | |
## oh wow this is a large file | |
gss = read.spss('/home/will/research/gss/GSS7218_R1.sav', to.data.frame = T) | |
## do a bit of organizing | |
names(gss) = tolower(names(gss)) | |
gss$wtssall = as.numeric(as.character(gss$wtssall)) | |
gss$age = as.numeric(as.character(gss$age)) | |
## a bit of exploratory data analysis | |
year_counts = table(gss$year) | |
plot(as.integer(names(year_counts)), as.numeric(year_counts)) | |
## let's subset this for later | |
gss = subset(gss, year >= 1988) | |
useful_vars = c('year', 'wtssall', 'sex', 'age', 'race', | |
'marital', 'sexfreq', 'partners') | |
gss = gss[, useful_vars] | |
save(gss, file = 'gss_sex.Rdata') | |
## hypothesis tests | |
library(reshape2) | |
library(ggplot2) | |
load('gss_sex.Rdata') | |
## organize the data a bit | |
gss$no_sex = gss$sexfreq == 'NOT AT ALL' | |
gss$no_partners = gss$partners == 'NO PARTNERS' | |
## also subset by age for these analyses | |
gss = subset(gss, age <= 30) | |
id_vars = c('year', 'wtssall', 'sex', 'age', 'race', | |
'marital') | |
long_gss = melt(gss, id.vars = id_vars, measure.vars = c('no_sex', 'no_partners')) | |
## 1) trend (linear regression with percents) | |
## check some graphs | |
ysex = aggregate(wtssall ~ variable + year + value, FUN = sum, data = long_gss) | |
ysex_df = dcast(ysex, variable + year ~ value, value.var = 'wtssall') | |
ysex_df$no_sex_p = ysex_df$`TRUE` / (ysex_df$`FALSE` + ysex_df$`TRUE`) | |
ggplot(ysex_df, aes(year, no_sex_p)) + | |
geom_line() + | |
facet_wrap(~ variable, nrow = 2) | |
## split up by sex | |
ysex2 = aggregate(wtssall ~ variable + year + sex + value, FUN = sum, data = long_gss) | |
ysex2_df = dcast(ysex2, variable + year + sex ~ value, value.var = 'wtssall') | |
ysex2_df$no_sex_p = ysex2_df$`TRUE` / (ysex2_df$`FALSE` + ysex2_df$`TRUE`) | |
ggplot(ysex2_df, aes(year, no_sex_p, color = sex)) + | |
geom_line() + | |
facet_wrap(~ variable, nrow = 2) | |
## linear probability model | |
lp1 = lm(no_sex ~ year, data = gss, weights = gss$wtssall) | |
lp2 = lm(no_sex ~ year * sex, data = gss, weights = gss$wtssall) | |
lp3 = lm(no_partners ~ year, data = gss, subset = year >= 2008, weights = gss$wtssall) | |
lp4 = lm(no_partners ~ year * sex, data = gss, subset = year >= 2008, weights = gss$wtssall) | |
## same but with logistic regression | |
lr1 = glm(no_sex ~ year, data = gss, weights = gss$wtssall, family = 'binomial') | |
lr2 = glm(no_sex ~ year * sex, data = gss, weights = gss$wtssall, family = 'binomial') | |
lr3 = glm(no_partners ~ year, data = gss, subset = year >= 2008, | |
weights = gss$wtssall, family = 'binomial') | |
lr4 = glm(no_partners ~ year * sex, data = gss, subset = year >= 2008, | |
weights = gss$wtssall, family = 'binomial') | |
## 2) 2018 significantly higher than 2008 (2-sample t-test) | |
## actually nevermind this is more of a prop.test kind of thing | |
## no_sex | |
no_sex_tab = subset(ysex_df, year %in% c(2008, 2018) & | |
variable == 'no_partners', | |
select = c(`TRUE`, `FALSE`)) | |
no_sex_tab = as.table(as.matrix(no_sex_tab)) | |
prop.test(no_sex_tab, alternative = 'less') | |
## men only | |
no_sex_tab = subset(ysex2_df, year %in% c(2008, 2018) & | |
variable == 'no_partners' & | |
sex == 'MALE', | |
select = c(`TRUE`, `FALSE`)) | |
no_sex_tab = as.table(as.matrix(no_sex_tab)) | |
prop.test(no_sex_tab, alternative = 'less') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment