4  Reconcile Demographic Variables

4.1 Adding Race and Gender Answers from Year 1 Survey

Questions about the race and gender of CEOs and Board Chairs are only presented to users if they indicate that a leadership change has occurred in Year 2 of the survey. Hence, we will need to impute in race and gender responses for participants who did not indicate a change in leadership.

4.1.1 Identifying cases for imputation

These respondents indicate that there were no changes to their CEO in year 2.

have.changes <- 
  ( survey_df$LeadershipChng_HireCEO  == 1 | 
    survey_df$LeadershipChng_IntrmCEO == 1   )

no_ceo_chng_ein <- 
  survey_df %>% 
  dplyr::filter( ! have.changes ) %>% 
  dplyr::pull("EIN")

no_bchair_chng_ein <- 
  survey_df %>% 
  dplyr::filter( LeadershipChng_ChngBC != 1 ) %>% 
  dplyr::pull("EIN")

4.1.2 Processing Year 1 Data

In Year 1, the survey questions for race and gender are encoded in a single factor variable instead of the multiple boolean variables in Year 2. We will have to wrangle the data from Year 1 into a format for Year 2.

fpath     <- "DATA-PREP/01-year-one/02-data-intermediate/"
fname     <- "wave-01-data-intermediate.csv"
year1_raw <- readr::read_csv( paste0( fpath, fname ) )
# Wrangle Data for Year 1 Responses on CEO race and Gender
year1_CEOchng <- year1_raw %>% 
  dplyr::select(EIN, CEOrace, CEOgender) %>% 
  dplyr::filter(EIN %in% no_ceo_chng_ein,
                ! CEOrace %in% c(-99, NA),
                ! CEOgender %in% c(-99, NA)) %>% 
  dplyr::mutate(
    CEOrace = dplyr::case_match(
      CEOrace,
      1 ~ "AAPI", 2 ~ "Black", 3 ~ "Hisp", 4 ~ "NativeAm", 5 ~ "White", 6 ~ "Bi", 7 ~ "Oth",
      .default = "Oth"
    ),
    CEOgender = dplyr::case_match(
      CEOgender,
      1 ~ "Man", 2 ~ "Woman", 3 ~ "Trans", 4 ~ "NB", 5 ~ "Oth",
      .default = "Oth"
    ),
  race_check = 1,
  gender_check = 1
  ) %>% 
  tidyr::pivot_wider(
    names_from = CEOrace,
    names_glue = "CEOrace_{CEOrace}",
    values_from = race_check, 
    values_fill = 0
  ) %>% 
  tidyr::pivot_wider(
    names_from = CEOgender,
    names_glue = "CEOgender_{CEOgender}",
    values_from = gender_check, 
    values_fill = 0
  )

# Wrangle Data for Year 1 Responses on Board Chair Race and Gender
year1_BCchng <- year1_raw %>% 
  dplyr::select(EIN, BCrace, BCgender) %>% 
  dplyr::filter(EIN %in% no_bchair_chng_ein,
                ! BCrace %in% c(-99, NA),
                ! BCgender %in% c(-99, NA)) %>% 
  dplyr::mutate(
    BCrace = dplyr::case_match(
      BCrace,
      1 ~ "AAPI", 2 ~ "Black", 3 ~ "Hisp", 4 ~ "NativeAm", 5 ~ "White", 6 ~ "Bi", 7 ~ "Oth",
      .default = "Oth"
    ),
    BCgender = dplyr::case_match(
      BCgender,
      1 ~ "Man", 2 ~ "Woman", 3 ~ "Trans", 4 ~ "NB", 5 ~ "Oth",
      .default = "Oth"
    ),
  race_check = 1,
  gender_check = 1
  ) %>% 
  tidyr::pivot_wider(
    names_from = BCrace,
    names_glue = "BChairrace_{BCrace}",
    values_from = race_check, 
    values_fill = 0
  ) %>% 
  tidyr::pivot_wider(
    names_from = BCgender,
    names_glue = "BChairgender_{BCgender}",
    values_from = gender_check, 
    values_fill = 0
  )

4.1.3 Create Biracial Categories in Year 2

Since the Year 1 data has an indicator for biracial CEOs and Board Chairs, we will have to create a boolean Biracial indicator variable in the Year 2 data that returns a 1 if a CEO or Board Chair belongs to 2 or more racial groups.

race_ceo_qns_bool <- 
  race_gender_qns_bool[ grepl( "CEOrace", race_gender_qns_bool ) ]

race_bchair_qns_bool <- 
  race_gender_qns_bool[ grepl( "BChairrace", race_gender_qns_bool ) ]

race_qns <- c( race_ceo_qns_bool, race_bchair_qns_bool )

RULES <- c(    "    Yes  =>>   1    ", 
               "    No   =>>   0    "    )

rules <- parse_rules( RULES )          
pattern <- rules[[ "pattern" ]]
replace <- rules[[ "replace" ]]

survey_df[ race_qns ] <- 
  survey_df[ race_qns ] %>%
  lapply( recode_x, pattern, replace )

survey_df[ race_qns ] <- 
  survey_df[ race_qns ] %>%
  lapply( as.numeric )

survey_df <- 
  survey_df %>%
  dplyr::rowwise() %>% 
  dplyr::mutate(
    CEOrace_Bi = ifelse(dplyr::between(
      sum(dplyr::c_across(tidyselect::all_of(race_ceo_qns_bool)), na.rm = TRUE), 2, 6
    ), 1, 0),
   BChairrace_Bi = ifelse(dplyr::between(
      sum(dplyr::c_across(tidyselect::all_of(race_bchair_qns_bool)), na.rm = TRUE), 2, 6
    ), 1, 0)  
  )

4.1.4 Merge Year 1 Race and Gender Variables with Year 2

race_ceo_qns_bool <- c( race_ceo_qns_bool, "CEOrace_Bi" )

for (race_var in race_ceo_qns_bool){
  survey_df <- 
    survey_df %>% 
    dplyr::mutate(
      !! race_var := ifelse(
         EIN %in% year1_CEOchng$EIN, 
         year1_CEOchng[[race_var]], 
        .data[[race_var]] )
    )
}

gender.qs <- grepl( "CEOgender", race_gender_qns_bool )
gender_ceo_qns_bool <- race_gender_qns_bool[ gender.qs ]

COLUMNS <- gender_ceo_qns_bool

RULES <- c(    "    Yes   =>>   1    ", 
               "     No   =>>   0    "    )

rules <- parse_rules( RULES )          
pattern <- rules[[ "pattern" ]]
replace <- rules[[ "replace" ]]

survey_df[ COLUMNS ] <- 
  survey_df[ COLUMNS ] %>%
  lapply( recode_x, pattern, replace )

survey_df[ COLUMNS ] <- 
  survey_df[ COLUMNS ] %>%
  lapply( as.numeric )

# lapply( survey_df[ COLUMNS ], table )

for ( gender_var in gender_ceo_qns_bool ){
  survey_df <- 
    survey_df %>% 
    dplyr::mutate(
      
      !! gender_var := ifelse(
        
        EIN %in% year1_CEOchng$EIN & gender_var %in% names( year1_CEOchng ), 
        year1_CEOchng[[ gender_var ]], 
        .data[[ gender_var ]]
      
      )
    )
}

race_bchair_qns_bool <- c(race_bchair_qns_bool, "BChairrace_Bi")

COLUMNS <- race_bchair_qns_bool

RULES <- c(    
  
  "  Yes =>> 1  ", 
  "  No  =>> 0  "    )

rules <- parse_rules( RULES )          
pattern <- rules[[ "pattern" ]]
replace <- rules[[ "replace" ]]

survey_df[ COLUMNS ] <- 
  survey_df[ COLUMNS ] %>%
  lapply( recode_x, pattern, replace )

# lapply( survey_df[ COLUMNS ], table )

survey_df[ COLUMNS ] <- 
  survey_df[ COLUMNS ] %>%
  lapply( as.numeric )

for (race_var in race_bchair_qns_bool){
  survey_df <- survey_df %>% 
  dplyr::mutate(
    !! race_var := ifelse(
    EIN %in% year1_BCchng$EIN, 
    year1_BCchng[[race_var]], 
    .data[[race_var]]
    )
  )
}

gender_bchair_qns_bool <- 
  race_gender_qns_bool[ grepl("BChairgender", race_gender_qns_bool) ]

COLUMNS <- gender_bchair_qns_bool

RULES <- c(     "       Yes  =>>   1    ", 
                "       No   =>>   0    ",
                "    Female  =>>   1    ", 
                "    Male    =>>   1    "      )

rules <- parse_rules( RULES )          
pattern <- rules[[ "pattern" ]]
replace <- rules[[ "replace" ]]

survey_df[ COLUMNS ] <- 
  survey_df[ COLUMNS ] %>%
  lapply( recode_x, pattern, replace )

# lapply( survey_df[ COLUMNS ], table )

survey_df[ COLUMNS ] <- 
  survey_df[ COLUMNS ] %>%
  lapply( as.numeric )

for (gender_var in gender_bchair_qns_bool){
  survey_df <- survey_df %>% 
  dplyr::mutate(
    !! gender_var := ifelse(
    EIN %in% year1_BCchng$EIN & gender_var %in% names(year1_BCchng), 
    year1_BCchng[[gender_var]], 
    .data[[gender_var]]
    )
  )
}

4.1.5 Recode New Race and Gender Variables

create_survey_item <- function( 
    survey_data, 
    qns, 
    recode_vals, 
    recode_labs, 
    missing_vals ){

  survey_data <- 
    survey_data %>%
    purrr::modify_at(
      .at = qns,
      .f = memisc::as.item,
      labels = structure(
        .Data = recode_vals,
        names = recode_labs ),
      missing.values = missing_vals )
      
  return( survey_data )
}
survey_df <- create_survey_item(
  survey_df, 
  race_ceo_qns_bool, 
  recode_vals = c(0, 1, 99), 
  recode_labs = c("No", "Yes", "Incomplete"), 
  missing_vals = c(99)
)

survey_df <- create_survey_item(
  survey_df, 
  gender_ceo_qns_bool, 
  recode_vals = c(0, 1, 99), 
  recode_labs = c("No", "Yes", "Incomplete"), 
  missing_vals = c(99)
)

survey_df <- create_survey_item(
  survey_df, 
  race_bchair_qns_bool, 
  recode_vals = c(0, 1, 99), 
  recode_labs = c("No", "Yes", "Incomplete"), 
  missing_vals = c(99)
)

survey_df <- create_survey_item(
  survey_df, 
  gender_bchair_qns_bool, 
  recode_vals = c(0, 1, 99), 
  recode_labs = c("No", "Yes", "Incomplete"), 
  missing_vals = c(99)
)

4.1.6 Create single Race/Gender column

While our race and gender columns are individual binary columns, this is not the case for year 1 and 3 results. Both those years contain single columns for race and gender variables for CEOs and Board Chairs respectively. For easier comparability, we create a new variable aggregating race and gender values for all our individual binary columns.

4.1.6.1 Aggregated Race Variable

Original Value Description Recode Label Recode Value Code as Missing ?
1 Asian/Pacific Islander AAPI 1 No
1 Black/African American Black 2 No
1 Latinx/Hispanic Hisp 3 No
1 Native American/American Indian NativeAm 4 No
1 White White 5 No
1 Bi/Multi-racial Bi 6 No
1 Other (please specify) Oth 7 No
0 Checkbox Unchecked NA Yes
# Create New Race variables
survey_df <- survey_df %>%
  dplyr::mutate(
    CEOrace = dplyr::case_when(
      CEOrace_AAPI == 1 ~ 1,
      CEOrace_Black == 1 ~ 2,
      CEOrace_Hisp == 1 ~ 3,
      CEOrace_NativeAm == 1 ~ 4,
      CEOrace_White == 1 ~ 5,
      CEOrace_Bi == 1 ~ 6,
      CEOrace_Oth == 1 ~ 7,
      .default = NA
    ),
    BChairrace = dplyr::case_when(
      BChairrace_AAPI == 1 ~ 1,
      BChairrace_Black == 1 ~ 2,
      BChairrace_Hisp == 1 ~ 3,
      BChairrace_NativeAm == 1 ~ 4,
      BChairrace_White == 1 ~ 5,
      BChairrace_Bi == 1 ~ 6,
      BChairrace_Oth == 1 ~ 7,
      .default = NA
  ))

# Create Survey Item
survey_df <- create_survey_item(
  survey_df, 
  c("CEOrace", "BChairrace"), 
  recode_vals = c(1, 2, 3, 4, 5, 6, 7), 
  recode_labs = c("AAPI", "Black", "Hisp", "NativeAm", "White", "Bi", "Oth"), 
  missing_vals = c()
)

4.1.6.2 Aggregated Gender Variable

Original Value Description Recode Label Recode Value Code as Missing ?
1 Man Man 1 No
1 Woman Woman 2 No
1 Trans Trans 3 No
1 Gender non-conforming/Non-Binary NB 4 No
1 Other (please specify) Oth 5 No
0 Checkbox Unchecked NA Yes
# Create New Race variables
survey_df <- survey_df %>%
  dplyr::mutate(
    CEOgender = dplyr::case_when(
      CEOgender_Man == 1 ~ 1,
      CEOgender_Woman == 1 ~ 2,
      CEOgender_Trans == 1 ~ 3,
      CEOgender_NB == 1 ~ 4,
      CEOgender_Oth == 1 ~ 5,
      .default = NA
    ),
    BChairgender = dplyr::case_when(
      BChairgender_Man == 1 ~ 1,
      BChairgender_Woman == 1 ~ 2,
      BChairgender_Trans == 1 ~ 3,
      BChairgender_NB == 1 ~ 4,
      BChairgender_Oth == 1 ~ 5,
      .default = NA
  ))

# Create Survey Item
survey_df <- create_survey_item(
  survey_df, 
  c("CEOgender", "BChairgender"), 
  recode_vals = c(1, 2, 3, 4, 5), 
  recode_labs = c("Man", "Woman", "Trans", "NB", "Oth"), 
  missing_vals = c()
)

4.1.7 Validate Results

We can see that the number of valid responses for race and gender questions concerning the CEO and Board Chair have increased.

race_gender_qns <- 
  c( race_gender_qns_bool, 
     "CEOrace", "CEOgender", "BChairrace","BChairgender" )

show_html( codebook( survey_df[ race_gender_qns ] ) )

CEOrace_AAPI

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 467 99 . 8 67 . 6
1 ‘Yes’ 1 0 . 2 0 . 1
NA M 223 32 . 3

CEOrace_Black

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 462 98 . 7 66 . 9
1 ‘Yes’ 6 1 . 3 0 . 9
NA M 223 32 . 3

CEOrace_Hisp

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 464 99 . 1 67 . 1
1 ‘Yes’ 4 0 . 9 0 . 6
NA M 223 32 . 3

CEOrace_NativeAm

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 464 99 . 1 67 . 1
1 ‘Yes’ 4 0 . 9 0 . 6
NA M 223 32 . 3

CEOrace_White

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 12 2 . 6 1 . 7
1 ‘Yes’ 456 97 . 4 66 . 0
NA M 223 32 . 3

CEOrace_Oth

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 468 100 . 0 67 . 7
1 ‘Yes’ 0 0 . 0 0 . 0
NA M 223 32 . 3

CEOgender_Man

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 454 97 . 0 65 . 7
1 ‘Yes’ 14 3 . 0 2 . 0
NA M 223 32 . 3

CEOgender_Woman

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 14 3 . 0 2 . 0
1 ‘Yes’ 454 97 . 0 65 . 7
NA M 223 32 . 3

CEOgender_Trans

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 48 98 . 0 6 . 9
1 ‘Yes’ 1 2 . 0 0 . 1
NA M 642 92 . 9

CEOgender_NB

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 467 99 . 8 67 . 6
1 ‘Yes’ 1 0 . 2 0 . 1
NA M 223 32 . 3

CEOgender_Oth

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 468 100 . 0 67 . 7
1 ‘Yes’ 0 0 . 0 0 . 0
NA M 223 32 . 3

BChairrace_AAPI

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 491 99 . 4 71 . 1
1 ‘Yes’ 3 0 . 6 0 . 4
NA M 197 28 . 5

BChairrace_Black

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 472 95 . 5 68 . 3
1 ‘Yes’ 22 4 . 5 3 . 2
NA M 197 28 . 5

BChairrace_Hisp

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 477 96 . 6 69 . 0
1 ‘Yes’ 17 3 . 4 2 . 5
NA M 197 28 . 5

BChairrace_NativeAm

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 490 99 . 2 70 . 9
1 ‘Yes’ 4 0 . 8 0 . 6
NA M 197 28 . 5

BChairrace_White

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 34 6 . 9 4 . 9
1 ‘Yes’ 460 93 . 1 66 . 6
NA M 197 28 . 5

BChairrace_Oth

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 492 99 . 6 71 . 2
1 ‘Yes’ 2 0 . 4 0 . 3
NA M 197 28 . 5

BChairgender_Man

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 410 83 . 0 59 . 3
1 ‘Yes’ 84 17 . 0 12 . 2
NA M 197 28 . 5

BChairgender_Woman

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 83 16 . 8 12 . 0
1 ‘Yes’ 411 83 . 2 59 . 5
NA M 197 28 . 5

BChairgender_Trans

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 164 100 . 0 23 . 7
1 ‘Yes’ 0 0 . 0 0 . 0
NA M 527 76 . 3

BChairgender_NB

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 494 100 . 0 71 . 5
1 ‘Yes’ 0 0 . 0 0 . 0
NA M 197 28 . 5

BChairgender_Oth

Storage mode: double
Measurement: nominal
Missing values: 99

Values and labels N Valid Total
0 ‘No’ 492 99 . 6 71 . 2
1 ‘Yes’ 2 0 . 4 0 . 3
NA M 197 28 . 5

CEOrace

Storage mode: double
Measurement: nominal

Values and labels N Valid Total
1 ‘AAPI’ 1 0 . 2 0 . 1
2 ‘Black’ 6 1 . 3 0 . 9
3 ‘Hisp’ 3 0 . 6 0 . 4
4 ‘NativeAm’ 4 0 . 9 0 . 6
5 ‘White’ 454 97 . 0 65 . 7
6 ‘Bi’ 0 0 . 0 0 . 0
7 ‘Oth’ 0 0 . 0 0 . 0
NA M 223 32 . 3

CEOgender

Storage mode: double
Measurement: nominal

Values and labels N Valid Total
1 ‘Man’ 14 3 . 0 2 . 0
2 ‘Woman’ 453 96 . 8 65 . 6
3 ‘Trans’ 1 0 . 2 0 . 1
4 ‘NB’ 0 0 . 0 0 . 0
5 ‘Oth’ 0 0 . 0 0 . 0
NA M 223 32 . 3

BChairrace

Storage mode: double
Measurement: nominal

Values and labels N Valid Total
1 ‘AAPI’ 3 0 . 6 0 . 4
2 ‘Black’ 20 4 . 0 2 . 9
3 ‘Hisp’ 14 2 . 8 2 . 0
4 ‘NativeAm’ 3 0 . 6 0 . 4
5 ‘White’ 453 91 . 7 65 . 6
6 ‘Bi’ 0 0 . 0 0 . 0
7 ‘Oth’ 1 0 . 2 0 . 1
NA M 197 28 . 5

BChairgender

Storage mode: double
Measurement: nominal

Values and labels N Valid Total
1 ‘Man’ 84 17 . 0 12 . 2
2 ‘Woman’ 408 82 . 6 59 . 0
3 ‘Trans’ 0 0 . 0 0 . 0
4 ‘NB’ 0 0 . 0 0 . 0
5 ‘Oth’ 2 0 . 4 0 . 3
NA M 197 28 . 5