library(tidyr) library(crunch) library(stringr) library(dplyr) library(purrr) login() #User input here: ds <- loadDataset("") new_ds_name <- "New stacked dataset" variables_to_retain <- c("retain_1", "retain_2", "retain_3") variables_to_stack <- list( 'q1' = c('q1_1', 'q1_2', 'q1_3', 'q1_4', 'q1_5', 'q1_6'), 'q2' = c('q2_1', 'q2_2', 'q2_3', 'q2_4', 'q2_5', 'q2_6'), 'q3' = c('q3_1', 'q3_2', 'q3_3', 'q3_4', 'q3_5', 'q3_6'), 'q4' = c('q4_1', 'q4_2', 'q4_3', 'q4_4', 'q4_5', 'q4_6'), 'q5' = c('q5_1', 'q5_2', 'q5_3', 'q5_4', 'q5_5', 'q5_6') ) occasion_labels <- c("occasion 1", "occasion 2", "occasion 3") # --- missing_vars <- unlist(variables_to_stack)[!(unlist(variables_to_stack) %in% aliases(allVariables(ds)))] if (length(missing_vars) > 0) { stop(paste0( "Could not find ", length(missing_vars), " stacking variables in original dataset: \n", "First 5 are: ", paste0("'", missing_vars[seq_len(min(length(missing_vars), 5))], "'", collapse = ", "), "\nSee `missing_vars` for full list." )) } keep_vars <- c( variables_to_retain, unique(str_replace(unname(unlist(variables_to_stack)), "\\$.+", "")) ) var_lookup <- function(x, ds) { components <- str_split(x, "\\$")[[1]] if (length(components) > 1) return(ds[[components[1]]][[components[2]]]) ds[[x]] } stack_array_info <- imap( variables_to_stack, function(stack_var_aliases, stack_name) { stack_vars <- map(stack_var_aliases, ~var_lookup(., ds)) var_types <- map_chr(stack_vars, type) if (!all(var_types == var_types[1])) { stop("Variables must all be of the same time to stack.") } aliases <- str_replace(stack_var_aliases, ".+\\$", "") if (!is.Array(stack_vars[[1]])) return(aliases) sv1_aliases <- aliases(subvariables(stack_vars[[1]])) out <- transpose(map(stack_vars, ~aliases(subvariables(.)))) setNames(out, sv1_aliases) } ) mr_vars <- keep(variables_to_stack, ~is.MR(var_lookup(.[[1]], ds))) %>% map(~categories(var_lookup(.[[1]], ds))) %>% map(~names(.[is.selected(.)])) spec_info <- stack_array_info %>% imap(function(var_info, out_name) { if (is.character(var_info)) setNames(list(var_info), out_name) else var_info }) %>% flatten() spec <- tibble( .name = unname(unlist(spec_info)), .value = rep(names(spec_info), lengths(spec_info)) ) %>% group_by(.value) %>% mutate(occasion = row_number()) if (!is.null(occasion_labels)) { if (length(occasion_labels) < max(spec$occasion)) { stop("Not enough occasion labels specified") } spec <- spec %>% mutate( occasion = factor(occasion, labels = occasion_labels) ) } stacked <- ds[keep_vars] %>% as.data.frame(force = TRUE) %>% pivot_longer_spec(spec) ds_stacked <- newDataset(stacked, name = new_ds_name) stack_array_info %>% keep(is.list) %>% imap(function(subvar_info, alias) { if (alias %in% names(mr_vars)) { makeMR(ds_stacked[names(subvar_info)], alias = alias, name = alias, selections = mr_vars[[alias]]) } else { makeArray(ds_stacked[names(subvar_info)], alias = alias, name = alias) } }) %>% addVariables(dataset = ds_stacked) iwalk(variables_to_stack, function(orig_aliases, new_alias) { orig_var <- var_lookup(orig_aliases[1], ds) new_var <- var_lookup(new_alias, ds_stacked) if (name(new_var) != name(orig_var) & name(orig_var) != alias(orig_var)) { name(new_var) <- name(orig_var) } description(new_var) <- description(orig_var) if (is.Array(new_var)) { names(subvariables(new_var)) <- names(subvariables(orig_var)) } }) map(variables_to_retain, function(orig_alias) { orig_var <- ds[[orig_alias]] if (is.MR(orig_var)) { orig_cats <- categories(orig_var) makeMR( ds_stacked[aliases(subvariables(orig_var))], alias = orig_alias, name = name(orig_var), description = description(orig_var), selections = names(orig_cats)[is.selected(orig_cats)] ) } else if (is.Array(orig_var)) { orig_subvars <- subvariables(orig_var) makeArray( ds_stacked[aliases(orig_subvars)], alias = orig_alias, name = name(orig_var), description = description(orig_var) ) } }) %>% keep(~!is.null(.)) %>% addVariables(dataset = ds_stacked) walk(variables_to_retain, function(orig_alias) { orig_var <- ds[[orig_alias]] name(ds_stacked[[orig_alias]]) <- name(orig_var) description(ds_stacked[[orig_alias]]) <- description(orig_var) if (is.Array(orig_var)) { names(subvariables(ds_stacked[[orig_alias]])) <- names(subvariables(orig_var)) } }) ds_stacked <- refresh(ds_stacked) webApp(ds_stacked) #then copy and tweak Crunch Automation from the unstacked dataset