4 Reconcile Demographic Variables

4.1 Adding Race and Gender Answers from Year 1 Survey

Questions about the race and gender of CEOs and Board Chairs are only presented to users if they indicate that a leadership change has occurred in Year 2 of the survey. Hence, we will need to impute in race and gender responses for participants who did not indicate a change in leadership.

4.1.1 Identifying cases for imputation

These respondents indicate that there were no changes to their CEO in year 2.

have.changes <- 
  ( survey_df$LeadershipChng_HireCEO  == 1 | 
    survey_df$LeadershipChng_IntrmCEO == 1   )

no_ceo_chng_ein <- 
  survey_df %>% 
  dplyr::filter( ! have.changes ) %>% 
  dplyr::pull("EIN")

no_bchair_chng_ein <- 
  survey_df %>% 
  dplyr::filter( LeadershipChng_ChngBC != 1 ) %>% 
  dplyr::pull("EIN")

4.1.2 Processing Year 1 Data

In Year 1, the survey questions for race and gender are encoded in a single factor variable instead of the multiple boolean variables in Year 2. We will have to wrangle the data from Year 1 into a format for Year 2.

fpath     <- "DATA-PREP/01-year-one/02-data-intermediate/"
fname     <- "wave-01-data-intermediate.csv"
year1_raw <- readr::read_csv( paste0( fpath, fname ) )

# Wrangle Data for Year 1 Responses on CEO race and Gender
year1_CEOchng <- year1_raw %>% 
  dplyr::select(EIN, CEOrace, CEOgender) %>% 
  dplyr::filter(EIN %in% no_ceo_chng_ein,
                ! CEOrace %in% c(-99, NA),
                ! CEOgender %in% c(-99, NA)) %>% 
  dplyr::mutate(
    CEOrace = dplyr::case_match(
      CEOrace,
      1 ~ "AAPI", 2 ~ "Black", 3 ~ "Hisp", 4 ~ "NativeAm", 5 ~ "White", 6 ~ "Bi", 7 ~ "Oth",
      .default = "Oth"
    ),
    CEOgender = dplyr::case_match(
      CEOgender,
      1 ~ "Man", 2 ~ "Woman", 3 ~ "Trans", 4 ~ "NB", 5 ~ "Oth",
      .default = "Oth"
    ),
  race_check = 1,
  gender_check = 1
  ) %>% 
  tidyr::pivot_wider(
    names_from = CEOrace,
    names_glue = "CEOrace_{CEOrace}",
    values_from = race_check, 
    values_fill = 0
  ) %>% 
  tidyr::pivot_wider(
    names_from = CEOgender,
    names_glue = "CEOgender_{CEOgender}",
    values_from = gender_check, 
    values_fill = 0
  )

# Wrangle Data for Year 1 Responses on Board Chair Race and Gender
year1_BCchng <- year1_raw %>% 
  dplyr::select(EIN, BCrace, BCgender) %>% 
  dplyr::filter(EIN %in% no_bchair_chng_ein,
                ! BCrace %in% c(-99, NA),
                ! BCgender %in% c(-99, NA)) %>% 
  dplyr::mutate(
    BCrace = dplyr::case_match(
      BCrace,
      1 ~ "AAPI", 2 ~ "Black", 3 ~ "Hisp", 4 ~ "NativeAm", 5 ~ "White", 6 ~ "Bi", 7 ~ "Oth",
      .default = "Oth"
    ),
    BCgender = dplyr::case_match(
      BCgender,
      1 ~ "Man", 2 ~ "Woman", 3 ~ "Trans", 4 ~ "NB", 5 ~ "Oth",
      .default = "Oth"
    ),
  race_check = 1,
  gender_check = 1
  ) %>% 
  tidyr::pivot_wider(
    names_from = BCrace,
    names_glue = "BChairrace_{BCrace}",
    values_from = race_check, 
    values_fill = 0
  ) %>% 
  tidyr::pivot_wider(
    names_from = BCgender,
    names_glue = "BChairgender_{BCgender}",
    values_from = gender_check, 
    values_fill = 0
  )

4.1.3 Create Biracial Categories in Year 2

Since the Year 1 data has an indicator for biracial CEOs and Board Chairs, we will have to create a boolean Biracial indicator variable in the Year 2 data that returns a 1 if a CEO or Board Chair belongs to 2 or more racial groups.

race_ceo_qns_bool <- 
  race_gender_qns_bool[ grepl( "CEOrace", race_gender_qns_bool ) ]

race_bchair_qns_bool <- 
  race_gender_qns_bool[ grepl( "BChairrace", race_gender_qns_bool ) ]

race_qns <- c( race_ceo_qns_bool, race_bchair_qns_bool )

RULES <- c(    "    Yes  =>>   1    ", 
               "    No   =>>   0    "    )

rules <- parse_rules( RULES )          
pattern <- rules[[ "pattern" ]]
replace <- rules[[ "replace" ]]

survey_df[ race_qns ] <- 
  survey_df[ race_qns ] %>%
  lapply( recode_x, pattern, replace )

survey_df[ race_qns ] <- 
  survey_df[ race_qns ] %>%
  lapply( as.numeric )

survey_df <- 
  survey_df %>%
  dplyr::rowwise() %>% 
  dplyr::mutate(
    CEOrace_Bi = ifelse(dplyr::between(
      sum(dplyr::c_across(tidyselect::all_of(race_ceo_qns_bool)), na.rm = TRUE), 2, 6
    ), 1, 0),
   BChairrace_Bi = ifelse(dplyr::between(
      sum(dplyr::c_across(tidyselect::all_of(race_bchair_qns_bool)), na.rm = TRUE), 2, 6
    ), 1, 0)  
  )

4.1.4 Merge Year 1 Race and Gender Variables with Year 2

race_ceo_qns_bool <- c( race_ceo_qns_bool, "CEOrace_Bi" )

for (race_var in race_ceo_qns_bool){
  survey_df <- 
    survey_df %>% 
    dplyr::mutate(
      !! race_var := ifelse(
         EIN %in% year1_CEOchng$EIN, 
         year1_CEOchng[[race_var]], 
        .data[[race_var]] )
    )
}

gender.qs <- grepl( "CEOgender", race_gender_qns_bool )
gender_ceo_qns_bool <- race_gender_qns_bool[ gender.qs ]

COLUMNS <- gender_ceo_qns_bool

RULES <- c(    "    Yes   =>>   1    ", 
               "     No   =>>   0    "    )

rules <- parse_rules( RULES )          
pattern <- rules[[ "pattern" ]]
replace <- rules[[ "replace" ]]

survey_df[ COLUMNS ] <- 
  survey_df[ COLUMNS ] %>%
  lapply( recode_x, pattern, replace )

survey_df[ COLUMNS ] <- 
  survey_df[ COLUMNS ] %>%
  lapply( as.numeric )

# lapply( survey_df[ COLUMNS ], table )

for ( gender_var in gender_ceo_qns_bool ){
  survey_df <- 
    survey_df %>% 
    dplyr::mutate(
      
      !! gender_var := ifelse(
        
        EIN %in% year1_CEOchng$EIN & gender_var %in% names( year1_CEOchng ), 
        year1_CEOchng[[ gender_var ]], 
        .data[[ gender_var ]]
      
      )
    )
}

race_bchair_qns_bool <- c(race_bchair_qns_bool, "BChairrace_Bi")

COLUMNS <- race_bchair_qns_bool

RULES <- c(    
  
  "  Yes =>> 1  ", 
  "  No  =>> 0  "    )

rules <- parse_rules( RULES )          
pattern <- rules[[ "pattern" ]]
replace <- rules[[ "replace" ]]

survey_df[ COLUMNS ] <- 
  survey_df[ COLUMNS ] %>%
  lapply( recode_x, pattern, replace )

# lapply( survey_df[ COLUMNS ], table )

survey_df[ COLUMNS ] <- 
  survey_df[ COLUMNS ] %>%
  lapply( as.numeric )

for (race_var in race_bchair_qns_bool){
  survey_df <- survey_df %>% 
  dplyr::mutate(
    !! race_var := ifelse(
    EIN %in% year1_BCchng$EIN, 
    year1_BCchng[[race_var]], 
    .data[[race_var]]
    )
  )
}

gender_bchair_qns_bool <- 
  race_gender_qns_bool[ grepl("BChairgender", race_gender_qns_bool) ]

COLUMNS <- gender_bchair_qns_bool

RULES <- c(     "       Yes  =>>   1    ", 
                "       No   =>>   0    ",
                "    Female  =>>   1    ", 
                "    Male    =>>   1    "      )

rules <- parse_rules( RULES )          
pattern <- rules[[ "pattern" ]]
replace <- rules[[ "replace" ]]

survey_df[ COLUMNS ] <- 
  survey_df[ COLUMNS ] %>%
  lapply( recode_x, pattern, replace )

# lapply( survey_df[ COLUMNS ], table )

survey_df[ COLUMNS ] <- 
  survey_df[ COLUMNS ] %>%
  lapply( as.numeric )

for (gender_var in gender_bchair_qns_bool){
  survey_df <- survey_df %>% 
  dplyr::mutate(
    !! gender_var := ifelse(
    EIN %in% year1_BCchng$EIN & gender_var %in% names(year1_BCchng), 
    year1_BCchng[[gender_var]], 
    .data[[gender_var]]
    )
  )
}

4.1.5 Recode New Race and Gender Variables

create_survey_item <- function( 
    survey_data, 
    qns, 
    recode_vals, 
    recode_labs, 
    missing_vals ){

  survey_data <- 
    survey_data %>%
    purrr::modify_at(
      .at = qns,
      .f = memisc::as.item,
      labels = structure(
        .Data = recode_vals,
        names = recode_labs ),
      missing.values = missing_vals )
      
  return( survey_data )
}

survey_df <- create_survey_item(
  survey_df, 
  race_ceo_qns_bool, 
  recode_vals = c(0, 1, 99), 
  recode_labs = c("No", "Yes", "Incomplete"), 
  missing_vals = c(99)
)

survey_df <- create_survey_item(
  survey_df, 
  gender_ceo_qns_bool, 
  recode_vals = c(0, 1, 99), 
  recode_labs = c("No", "Yes", "Incomplete"), 
  missing_vals = c(99)
)

survey_df <- create_survey_item(
  survey_df, 
  race_bchair_qns_bool, 
  recode_vals = c(0, 1, 99), 
  recode_labs = c("No", "Yes", "Incomplete"), 
  missing_vals = c(99)
)

survey_df <- create_survey_item(
  survey_df, 
  gender_bchair_qns_bool, 
  recode_vals = c(0, 1, 99), 
  recode_labs = c("No", "Yes", "Incomplete"), 
  missing_vals = c(99)
)

4.1.6 Create single Race/Gender column

While our race and gender columns are individual binary columns, this is not the case for year 1 and 3 results. Both those years contain single columns for race and gender variables for CEOs and Board Chairs respectively. For easier comparability, we create a new variable aggregating race and gender values for all our individual binary columns.

4.1.6.1 Aggregated Race Variable

Original Value	Description	Recode Label	Recode Value	Code as Missing ?
1	Asian/Pacific Islander	AAPI	1	No
1	Black/African American	Black	2	No
1	Latinx/Hispanic	Hisp	3	No
1	Native American/American Indian	NativeAm	4	No
1	White	White	5	No
1	Bi/Multi-racial	Bi	6	No
1	Other (please specify)	Oth	7	No
0	Checkbox Unchecked	NA	Yes

# Create New Race variables
survey_df <- survey_df %>%
  dplyr::mutate(
    CEOrace = dplyr::case_when(
      CEOrace_AAPI == 1 ~ 1,
      CEOrace_Black == 1 ~ 2,
      CEOrace_Hisp == 1 ~ 3,
      CEOrace_NativeAm == 1 ~ 4,
      CEOrace_White == 1 ~ 5,
      CEOrace_Bi == 1 ~ 6,
      CEOrace_Oth == 1 ~ 7,
      .default = NA
    ),
    BChairrace = dplyr::case_when(
      BChairrace_AAPI == 1 ~ 1,
      BChairrace_Black == 1 ~ 2,
      BChairrace_Hisp == 1 ~ 3,
      BChairrace_NativeAm == 1 ~ 4,
      BChairrace_White == 1 ~ 5,
      BChairrace_Bi == 1 ~ 6,
      BChairrace_Oth == 1 ~ 7,
      .default = NA
  ))

# Create Survey Item
survey_df <- create_survey_item(
  survey_df, 
  c("CEOrace", "BChairrace"), 
  recode_vals = c(1, 2, 3, 4, 5, 6, 7), 
  recode_labs = c("AAPI", "Black", "Hisp", "NativeAm", "White", "Bi", "Oth"), 
  missing_vals = c()
)

4.1.6.2 Aggregated Gender Variable

Original Value	Description	Recode Label	Recode Value	Code as Missing ?
1	Man	Man	1	No
1	Woman	Woman	2	No
1	Trans	Trans	3	No
1	Gender non-conforming/Non-Binary	NB	4	No
1	Other (please specify)	Oth	5	No
0	Checkbox Unchecked	NA	Yes

# Create New Race variables
survey_df <- survey_df %>%
  dplyr::mutate(
    CEOgender = dplyr::case_when(
      CEOgender_Man == 1 ~ 1,
      CEOgender_Woman == 1 ~ 2,
      CEOgender_Trans == 1 ~ 3,
      CEOgender_NB == 1 ~ 4,
      CEOgender_Oth == 1 ~ 5,
      .default = NA
    ),
    BChairgender = dplyr::case_when(
      BChairgender_Man == 1 ~ 1,
      BChairgender_Woman == 1 ~ 2,
      BChairgender_Trans == 1 ~ 3,
      BChairgender_NB == 1 ~ 4,
      BChairgender_Oth == 1 ~ 5,
      .default = NA
  ))

# Create Survey Item
survey_df <- create_survey_item(
  survey_df, 
  c("CEOgender", "BChairgender"), 
  recode_vals = c(1, 2, 3, 4, 5), 
  recode_labs = c("Man", "Woman", "Trans", "NB", "Oth"), 
  missing_vals = c()
)

4.1.7 Validate Results

We can see that the number of valid responses for race and gender questions concerning the CEO and Board Chair have increased.

race_gender_qns <- 
  c( race_gender_qns_bool, 
     "CEOrace", "CEOgender", "BChairrace","BChairgender" )

show_html( codebook( survey_df[ race_gender_qns ] ) )

CEOrace_AAPI

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	467	99	.	8	67	.	6
1		‘Yes’	1	0	.	2	0	.	1
NA	M		223				32	.	3

CEOrace_Black

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	462	98	.	7	66	.	9
1		‘Yes’	6	1	.	3	0	.	9
NA	M		223				32	.	3

CEOrace_Hisp

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	464	99	.	1	67	.	1
1		‘Yes’	4	0	.	9	0	.	6
NA	M		223				32	.	3

CEOrace_NativeAm

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	464	99	.	1	67	.	1
1		‘Yes’	4	0	.	9	0	.	6
NA	M		223				32	.	3

CEOrace_White

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	12	2	.	6	1	.	7
1		‘Yes’	456	97	.	4	66	.	0
NA	M		223				32	.	3

CEOrace_Oth

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	468	100	.	0	67	.	7
1		‘Yes’	0	0	.	0	0	.	0
NA	M		223				32	.	3

CEOgender_Man

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	454	97	.	0	65	.	7
1		‘Yes’	14	3	.	0	2	.	0
NA	M		223				32	.	3

CEOgender_Woman

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	14	3	.	0	2	.	0
1		‘Yes’	454	97	.	0	65	.	7
NA	M		223				32	.	3

CEOgender_Trans

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	48	98	.	0	6	.	9
1		‘Yes’	1	2	.	0	0	.	1
NA	M		642				92	.	9

CEOgender_NB

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	467	99	.	8	67	.	6
1		‘Yes’	1	0	.	2	0	.	1
NA	M		223				32	.	3

CEOgender_Oth

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	468	100	.	0	67	.	7
1		‘Yes’	0	0	.	0	0	.	0
NA	M		223				32	.	3

BChairrace_AAPI

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	491	99	.	4	71	.	1
1		‘Yes’	3	0	.	6	0	.	4
NA	M		197				28	.	5

BChairrace_Black

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	472	95	.	5	68	.	3
1		‘Yes’	22	4	.	5	3	.	2
NA	M		197				28	.	5

BChairrace_Hisp

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	477	96	.	6	69	.	0
1		‘Yes’	17	3	.	4	2	.	5
NA	M		197				28	.	5

BChairrace_NativeAm

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	490	99	.	2	70	.	9
1		‘Yes’	4	0	.	8	0	.	6
NA	M		197				28	.	5

BChairrace_White

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	34	6	.	9	4	.	9
1		‘Yes’	460	93	.	1	66	.	6
NA	M		197				28	.	5

BChairrace_Oth

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	492	99	.	6	71	.	2
1		‘Yes’	2	0	.	4	0	.	3
NA	M		197				28	.	5

BChairgender_Man

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	410	83	.	0	59	.	3
1		‘Yes’	84	17	.	0	12	.	2
NA	M		197				28	.	5

BChairgender_Woman

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	83	16	.	8	12	.	0
1		‘Yes’	411	83	.	2	59	.	5
NA	M		197				28	.	5

BChairgender_Trans

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	164	100	.	0	23	.	7
1		‘Yes’	0	0	.	0	0	.	0
NA	M		527				76	.	3

BChairgender_NB

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	494	100	.	0	71	.	5
1		‘Yes’	0	0	.	0	0	.	0
NA	M		197				28	.	5

BChairgender_Oth

Storage mode:	double
Measurement:	nominal
Missing values:	99

Values and labels			N	Valid			Total
0		‘No’	492	99	.	6	71	.	2
1		‘Yes’	2	0	.	4	0	.	3
NA	M		197				28	.	5

CEOrace

Storage mode:	double
Measurement:	nominal

Values and labels			N	Valid			Total
1		‘AAPI’	1	0	.	2	0	.	1
2		‘Black’	6	1	.	3	0	.	9
3		‘Hisp’	3	0	.	6	0	.	4
4		‘NativeAm’	4	0	.	9	0	.	6
5		‘White’	454	97	.	0	65	.	7
6		‘Bi’	0	0	.	0	0	.	0
7		‘Oth’	0	0	.	0	0	.	0
NA	M		223				32	.	3

CEOgender

Storage mode:	double
Measurement:	nominal

Values and labels			N	Valid			Total
1		‘Man’	14	3	.	0	2	.	0
2		‘Woman’	453	96	.	8	65	.	6
3		‘Trans’	1	0	.	2	0	.	1
4		‘NB’	0	0	.	0	0	.	0
5		‘Oth’	0	0	.	0	0	.	0
NA	M		223				32	.	3

BChairrace

Storage mode:	double
Measurement:	nominal

Values and labels			N	Valid			Total
1		‘AAPI’	3	0	.	6	0	.	4
2		‘Black’	20	4	.	0	2	.	9
3		‘Hisp’	14	2	.	8	2	.	0
4		‘NativeAm’	3	0	.	6	0	.	4
5		‘White’	453	91	.	7	65	.	6
6		‘Bi’	0	0	.	0	0	.	0
7		‘Oth’	1	0	.	2	0	.	1
NA	M		197				28	.	5

BChairgender

Storage mode:	double
Measurement:	nominal

Values and labels			N	Valid			Total
1		‘Man’	84	17	.	0	12	.	2
2		‘Woman’	408	82	.	6	59	.	0
3		‘Trans’	0	0	.	0	0	.	0
4		‘NB’	0	0	.	0	0	.	0
5		‘Oth’	2	0	.	4	0	.	3
NA	M		197				28	.	5