Last active
December 1, 2019 20:49
-
-
Save bgall/9111603604c5b21320f54a0218286aad to your computer and use it in GitHub Desktop.
Function to generate arbitrary conjoint attributes with specified probabilities values are selected
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################## | |
# Define function: create_attributes | |
# | |
# Creates randomly generated vectors of values from sets | |
# of potential values for a specified number of attributes | |
# | |
# *Arguments* | |
# | |
# attr_names (optional) | |
# vector of attribute names of attr_n length. If no values | |
# provided, attribute named attrib1, attrib2, etc. | |
# | |
# attr_levels | |
# vector or list of vectors of length N, each containing the | |
# unique levels each attribute can take. May include NA values. | |
# | |
# n_profiles | |
# number of profiles, i.e. # of values to draw for each | |
# attribute | |
# | |
# probs | |
# A vector or list of numeric vectors containing probabilities | |
# for sampling from the set of unique attribute values. Probs | |
# are assigned to each element in the attrinute value set based | |
# on vector index. If no value supplied, defaults to assigning | |
# with equal probability. | |
# | |
# NOTE: Currently no test for whether the number of probabilities | |
# passed to probs() is equal to the number of levels for the | |
# attribute, only that the number of probability vectors | |
# passed is equal to the number of attributes. Also, no check | |
# the probabilities sum to 1. | |
############################################################## | |
create_attributes <- | |
function(attr_levels, | |
n_profiles, | |
attr_names = NULL, | |
probs = NULL) { | |
# Convert attr_levels and probs to list to work with lists | |
# throughout for simplicity/speed. Since vectors are list | |
# but lists not necessarily vectors, use is.list(X) rather | |
# than is.vector(X) since, passing a list to is.vector is | |
# also TRUE. | |
if (!is.list(attr_levels)) { | |
attr_levels <- list(attr_levels) | |
} | |
if (!is.null(probs) & !is.list(probs)) { | |
probs <- list(probs) | |
} | |
# Number of attributes to generate | |
attr_n <- length(attr_levels) | |
# Test inputs are correct length | |
stopifnot(is.null(attr_names) | attr_n == length(attr_names)) | |
stopifnot(is.null(probs) | attr_n == length(probs)) | |
# If attribute names not provided, generate | |
# default attribute names attrib1, attrib2, ... | |
if (is.null(attr_names)) { | |
attr_names <- paste0("attrib", seq_len(attr_n)) | |
} | |
# If probs not provided, generate default | |
# equal probabilities of assignment | |
if (is.null(probs)) { | |
probs <- list() | |
for (i in 1:attr_n) { | |
attr_i_levels_n <- length(attr_levels[[i]]) | |
probs[[i]] <- rep(1 / attr_i_levels_n, attr_i_levels_n) | |
} | |
} | |
# Initialize list to store each attribute vector | |
attrib_list <- list() | |
# Generate attributes | |
for (i in 1:length(attr_names)) { | |
# If an attribute has only one level, is | |
# numeric, and >0, the x = argument in | |
# sample() thinks the level refers to the | |
# number of levels, incorrectly. So, | |
# so, separate these out since prob = 1 | |
# for the single value. See ?sample | |
# Details for more info on this issue. | |
if (is.numeric(attr_levels[[i]]) & | |
length(attr_levels[[i]]) == 1) { | |
attrib_list[[i]] <- rep(attr_levels[[i]], n_profiles) | |
} else { | |
attrib_list[[i]] <- sample( | |
x = attr_levels[[i]], | |
size = n_profiles, | |
replace = TRUE, | |
prob = probs[[i]] | |
) | |
} | |
} | |
# Collapse list into data frame, replace names | |
attrib_df <- dplyr::bind_cols(attrib_list) | |
colnames(attrib_df) <- attr_names | |
# Return attribute data frame | |
attrib_df | |
} | |
############################################################## | |
# EXAMPLES WITH 50 PROFILES | |
############################################################## | |
# n_profiles <- 50 | |
# | |
# # One attribute, default probs | |
# attr_levels <- c("a","b","c") | |
# create_attributes(attr_levels = attr_levels, | |
# n_profiles = n_profiles) | |
# | |
# # One attribute,specified probs | |
# attr_levels <- c("a","b","c") | |
# probs <- c(0.10,0.10,0.80) | |
# create_attributes(attr_levels = attr_levels, | |
# n_profiles = n_profiles, | |
# probs = probs) | |
# | |
# # Two attributes, default probs | |
# attr_levels <- list(c("a","b","c"), | |
# c(seq_len(6))) | |
# | |
# create_attributes(attr_levels = attr_levels, | |
# n_profiles = n_profiles) | |
# | |
# # Two attributes, specified probs | |
# attr_levels <- list(c("a","b","c"), | |
# c(seq_len(6))) | |
# probs <- list(c(0.50, 0.30, 0.20), | |
# c(0.05, 0.05, 0.05, 0.05, 0.40, 0.40)) | |
# | |
# create_attributes(attr_levels = attr_levels, | |
# n_profiles = n_profiles, | |
# probs = probs) | |
# two attributes, specified probs and names | |
# attr_levels <- list(c("a","b","c"), | |
# c(seq_len(6))) | |
# | |
# probs <- list(c(0.50, 0.30, 0.20), | |
# c(0.05, 0.05, 0.05, 0.05, 0.40, 0.40)) | |
# | |
# attr_names <- c("foo", "bar") | |
# | |
# attr_df<- create_attributes(attr_levels = attr_levels, | |
# n_profiles = n_profiles, | |
# attr_names = attr_names, | |
# probs = probs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment