Overview

This file

loads the data from our coding sheets (for English and non-English sources),
merges and aligns it,
converts effect sizes to Pearson’s r,
deals with attenuation and special sample sizes (repeated measured, sampled outputs),
and then describes / visualizes the sample.

# For reproducibility package versions are locked to a specific date
if (!require(groundhog)) install.packages('groundhog')
groundhog::groundhog.library(c("readxl", "metaforest", "metafor", "tidyverse", "clubSandwich", "cli", "rsprite2", "esc",
                               "mice", "metacart", "gt", "gtExtras", "psych", "furrr", "progressr"), date = "2023-07-09")
groundhog::groundhog.library(c("lukaswallrich/timesaveR"), date = "2023-07-09")

# Need more recent version of patchwork due to faceting bug
groundhog::groundhog.library(c("sf", "rworldmap", "numform", "patchwork"), date = "2023-07-09")

source("helpers/helpers.R")
source("helpers/equivalence_testing.R")

# Read M/SD as character to retain trailing 0s for GRIM/GRIMMER
col_types_en <- c(rep("?", 56), rep("_", 8))
col_types_en[c(3, 33:35, 42:45)] <- "c"
col_types_en <- paste0(col_types_en, collapse = "")

googlesheets4::gs4_deauth()
effect_sizes_en <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1pEYZUZvFr8qmULT077y932BUShpJvhs-asRuyyiBKlQ/edit#gid=1628922200", 
                                             sheet = "Unified Coding",
                                             skip = 2,
                                             col_types = col_types_en,
                                              na = c("#N/A", "NA", "")) %>% 
                              filter(!is.na(ID)) %>% 
  mutate(ID = str_remove(ID, "\\.0$"))

col_types_nen <- c(rep("?", 59), rep("_", 8))
col_types_nen[c(36:38, 45:48)] <- "c"
col_types_nen <- paste0(col_types_nen, collapse = "")

effect_sizes_n_en <- googlesheets4::read_sheet("https://docs.google.com/spreadsheets/d/1XCjlC3u7Ws2KCjaRQ1R0VuU0mLQ5mHVipmzuGtivLCs/edit#gid=1553069535", 
                                             sheet = "Unified Coding",
                                             skip = 2,
                                             col_types = col_types_nen,
                                              na = c("#N/A", "NA", "")) %>% 
                              filter(!is.na(ID)) %>%  filter(is.na(Excluded) | Excluded != "yes") %>% 
    mutate(ID = str_remove(ID, "\\.0$"))

# Save data to record version used
qs::qsave(effect_sizes_en, "data/effect_sizes_en.qs")
qs::qsave(effect_sizes_n_en, "data/effect_sizes_n_en.qs")

# Rename variables
rename_vec_en <- c(
  NULL = "double coded",
  id = "ID",
  effect_id = "effect_id",
  title = "Title",
  author_year = "Author (Year)",
  NULL = "File", # Only contains "PDF", not the link
  file = "File URL",
  year = "Year",
  NULL = "Coder",
  NULL = "Status",
  NULL = "Date coded",
  study = "Study",
  sample = "Sample",
  art_focus = "Article focus",
  pub_status = "Publication status",
  nonlin_rel = "Non-linear relationship",
  gen_notes =  "General Notes",
  design = "Design",
  setting = "Setting",
  ind_sector = "Industry/sector",
  team_function = "Function",
  country = "Country",
  n_teams = "N teams",
  n_obs = "N obs",
  stud_sample = "Student sample",
  tmt = "TMT",
  year_coll = "Year collected (if reported)",
  domain = "Domain",
  sub_dom = "Sub-domain",
  div_specific = "Specify",
  meas_type = "Measure type",
  items_div = "Items...31",
  opts_div = "Options...32",
  m_div = "M...33",
  sd_div = "SD...34",
  reliab_div = "Reliability Cronbach's alpha (or note)...35",
  notes_div = "Notes...36",
  name_perf = "Name",
  criterion = "Criterion",
  rater = "Rater",
  items_perf = "Items...40",
  opts_perf = "Options...41",
  m_perf = "M...42",
  sd_perf = "SD...43",
  reliab_perf = "Reliability Cronbach's alpha (or note)...44",
  notes_perf = "Notes...45",
  r = "r",
  d = "d",
  other = "other",
  stats_notes = "Statistics Notes",
  interdep = "Interdependence",
  complexity = "Complexity",
  longevity = "Longevity",
  virtuality = "Virtuality",
  auth_diff = "Authority differentiation",
  div_climate = "Diversity climate",
  psych_safe = "Psych safety"
)

rename_vec_n_en <- c(
  id = "ID",
  effect_id = "rowid",
  NULL = "Title",
  NULL = "File Or", 
  NULL = "File En",
  file = "File Or URL", # Contains original URL
  year = "Year",
  author_year = "Author (Year)",
  NULL = "Coder",
  NULL = "Status",
  NULL = "Date coded",
  study = "Study",
  sample = "Sample",
  language = "Language",
  art_focus = "Article focus",
  pub_status = "Publication status",
  nonlin_rel = "Non-linear relationship",
  gen_notes = "General Notes",
  design = "Design",
  setting = "Setting",
  ind_sector = "Industry/sector", 
  team_function = "Function",
  country = "Country",
  n_teams = "N teams",
  n_obs = "N obs", 
  stud_sample = "Student sample",
  tmt = "TMT",
  year_coll = "Year collected (if reported)",
  domain = "Domain",
  sub_dom = "Sub-domain",
  div_dom_specific = "Specify...31",  
  meas_type = "Measure type",
  div_specific = "Specify...33", 
  items_div = "Items...34",  
  options_div = "Options...35",  
  m_div = "M...36",  
  sd_div = "SD...37", 
  reliab_div = "Reliability Cronbach's alpha (or note)...38",  
  notes_div = "Notes...39",  
  name_perf = "Name",
  criterion = "Criterion",
  rater = "Rater",
  items_perf = "Items...43",  
  options_perf = "Options...44",  
  m_perf = "M...45",  
  sd_perf = "SD...46",  
  reliab_perf = "Reliability Cronbach's alpha (or note)...47",  
  notes_perf = "Notes...48",
  r = "r",
  d = "d",
  other = "other",
  stats_notes = "Statistics Notes",
  interdep = "Interdependence",
  complexity = "Complexity",
  longevity = "Longevity",
  virtuality = "Virtuality",
  auth_differentiation = "Authority differentiation",
  div_climate = "Diversity climate",
  psych_safe = "Psych safety"
)

names(effect_sizes_en) <- names(effect_sizes_en) %>% str_replace("\n", " ") %>% str_squish()
names(effect_sizes_n_en) <- names(effect_sizes_n_en) %>% str_replace("\n", " ") %>% str_squish()
effect_sizes_en <- effect_sizes_en %>% select(rename_vec_en[!names(rename_vec_en)=="NULL"])
effect_sizes_n_en <- effect_sizes_n_en %>% select(rename_vec_n_en[!names(rename_vec_n_en)=="NULL"])

dataset <- effect_sizes_en %>%
  mutate(articlestudy = paste(id, study, sample, sep = "/"),
         language = "english") %>%
  group_by(articlestudy) %>%
  mutate(effect_id = row_number()) %>%
  ungroup()  %>% 
  # Disambiguate author_year
  group_by(author_year)  %>% 
  mutate(id_rank = match(id, unique(id))) %>%
  mutate(author_year = ifelse(max(id_rank) > 1, 
                               paste0(author_year, letters[id_rank]), 
                               author_year)) %>%
  ungroup() %>%
  select(-id_rank)

dataset_nen <- effect_sizes_n_en %>%
  mutate(articlestudy = paste(id, study, sample, sep = "/")) %>%
  group_by(articlestudy) %>%
  mutate(effect_id = row_number()) %>%
  ungroup()  %>% 
  # Disambiguate author_year
  group_by(author_year)  %>% 
  mutate(id_rank = match(id, unique(id))) %>%
  mutate(author_year = ifelse(max(id_rank) > 1, 
                               paste0(author_year, letters[id_rank]), 
                               author_year)) %>%
  ungroup() %>%
  select(-id_rank)

# Fix data types
dataset <- dataset %>% bind_rows(dataset_nen %>% mutate(reliab_perf = as.character(reliab_perf))) %>% 
  mutate(n_teams = as.numeric(n_teams), domain = domain %>% str_replace("_", "-") %>% 
                                                           as.factor() %>% relevel(ref = "Demographic"))
# Remove inconsistencies
dataset$pub_status[dataset$pub_status == "MA Dissertation"] <- "Masters Dissertation"
dataset$language[dataset$language == "Chinese"] <- "chinese"

Estimate ‘corrected’ correlations and standard errors

Effective sample sizes

Largest sample sizes associated with studies that sampled outputs produced by teams (e.g., patents, Wikipedia articles and academic publications) rather than teams. Multiple of these may be created by the same team and frequently teams will overlap. Nevertheless, they provide relevant data based on large samples, so that we wanted to include them. As there is no systematic way to estimate how many independent teams these represent, the sample sizes were windsorized to the largest sample size representing independent teams. This affected 12 studies.

dataset$n_teams_coded <- dataset$n_teams

other_dataset <-  dataset %>% filter(is.na(n_obs) | n_obs != "OUTPUTS") 
max_sample <- max(other_dataset$n_teams)

adj_dataset <-  dataset %>% filter(n_obs == "OUTPUTS") %>% 
  rowwise() %>% 
  mutate(n_teams = min(n_teams, max_sample)) %>% 
  ungroup()

dataset <- bind_rows(other_dataset, adj_dataset)

Many more studies relied on multiple observations of the same teams, e.g, seasons for sports-teams and years for firms. These observations are evidently dependent, so that the number of observations cannot be treated as the effective sample size. Meta-analyses in business psychology rarely address this issue explicitly, and sometimes appear to treat observations from panel data as independent - yet that can give excessive weight to studies based on a low number of independent clusters. Instead, this needs to be corrected for the auto-correlation. This is rarely reported in the papers considered, so had to be assumed based on available data.

For sports teams, we identified two sources that reported year-on-year correlations, reporting r = .72 for the season-to-season win percentage in the NBA (Landis, 2001), and r = .64 for the season-to-season goal difference in the German Bundesliga (calculated based on Ben-Ner et al., 2017). Therefore, we assumed a year-on-year correlation of .7 for repeated observations of sports teams.

For year-on-year company performance, we consistently identified lower correlations, specifically: - Return on assets (logged): .43 (Hambrick et al., 2014) - Return on assets (Rickley et al., 2021): .54 Therefore, we assumed a year-on-year correlation of .5 for repeated measures of corporate and team performance.

However, for specific operational measures, the correlation is likely to be substantially different and likely higher. For instance, Pegels et al (2000) report that airline’s load factor had a year-on-year correlation of .96. Similarly, Zouaghi et al. (2020) measured R&D performance every year based on whether an innovative product had been introduced in the previous 3 years, which will evidently correlate strongly due to the measurement choice, regardless of underlying autocorrelation. Therefore, we only considered the number of independent observations in such cases.

After that, we calculated the effective sample size per observation using the common formula provided by the Stan Development Team (2024), where N refers to the number of observations per observed team, r to the correlation between adjacent time-points and N* to the effective sample size per team.

\[ N^* = max(\frac{N}{1 + 2 \times r}, 1) \]

other_dataset <- dataset %>% filter(is.na(n_obs) | str_detect(n_obs, "EXCL|OUTPUTS") | !team_function %in% c("Management", "Sports players"))

adj_dataset <- dataset %>% filter(!is.na(n_obs), !str_detect(n_obs, "EXCL|OUTPUTS"), team_function %in% c("Management", "Sports players")) %>% 
  mutate(scale = case_when(
      team_function == "Management" ~ 1 / (1 + 2 * 0.5),
      team_function == "Sports players" ~ 1 / (1 + 2 * 0.7)
      ),
         n_each = as.numeric(gsub("[^0-9.]+", "", n_obs)) / n_teams,
         n_teams = n_teams * pmax(1, n_each * scale)) %>% 
  select(-scale, -n_each)
                  
dataset <- bind_rows(other_dataset, adj_dataset)

Correction for attenuation

# Calculate r from other measures
# Formulae taken from Polanin & Snilsveit (Campbell SR, DOI: 10.4073/cmpn.2016.3)

d_to_r <- function(d, n1 = NULL, n2 = NULL, n = NULL) {
  # If only n is provided, equal group sizes are assumed (done throughout this MA)
  if (is.null(n1) && is.null(n2)) {n1 = n/2; n2 = n/2}
  a <- (n1 + n2)^2 / (n1 * n2)
  r <- d / sqrt(d^2 + a)
  return(r)
}

OR_to_r <- function(OR, n1 = NULL, n2 = NULL, n = NULL) {
  # If only n is provided, equal group sizes are assumed (done throughout this MA)
  if (is.null(n1) && is.null(n2)) {n1 = n/2; n2 = n/2}
  a <- (n1 + n2)^2 / (n1 * n2)
  r <- (log(OR) * (sqrt(3)/pi)) / sqrt((log(OR) * (sqrt(3)/pi)) + a)
  return(r)
}

calculate_es <- function(row) {
  if (!is.na(row$r)) return(row$r %>% as.numeric())
  if (!is.na(row$d)) return(d_to_r(row$d %>% as.numeric(), n = row$n_teams))
  if (!is.na(row$other)) {
    if (str_detect(row$other, "OR")) {
      return(OR_to_r(str_extract(row$other, "\\d*\\.\\d+") %>% as.numeric(), n = row$n_teams))
    } else {
      message("Challenge in ", row$id, " row: ", row$effect_id)
      return(NA)
    }
  }
}

dataset <- dataset %>% rowwise() %>% 
  mutate(r_rep = calculate_es(pick(everything()))) %>% 
  ungroup()

# Transform special reliabilities
# For 2-item scale, Cronbachs alpha is (2 * r) / (1 + r), so that can be directly converted
# Other measures (IRR, CR etc) can be used as is, as they are on the same scale as alpha
# Expect 'NAs introduced by coercion' warnings from case_when (https://github.com/tidyverse/dplyr/issues/6250)

dataset <- dataset %>%
  mutate(
    reliab_div_reported = reliab_div,
    reliab_perf_reported = reliab_perf,
    across(
      .cols = c(reliab_div, reliab_perf),
      .fns = list(
        type = ~case_when(
          is.na(.) ~ NA_character_,
          str_detect(., "^[0-9.]+$") ~ "cronbach",
          str_detect(., "CR") ~ "comp_reliab_other",
          str_detect(., "r =") ~ "r",
          str_detect(., "ICC|interrater|IRR") ~ "ICC_interrater",
          str_detect(., "[0-9]") ~ "other",
          TRUE ~ "other"
        ),
        conv = ~case_when(
          str_detect(., "^[0-9.]+$") ~ as.numeric(.),
          str_detect(., "CR") ~ as.numeric(str_extract(., "[0-9.]+")),
          str_detect(., "r =") ~ {
            r_value <- as.numeric(str_extract(., "(?<=r = ?)[0-9.]+"))
            (2 * r_value) / (1 + r_value)
          },
          str_detect(., "ICC|interrater|IRR") ~ as.numeric(str_extract(., "[0-9.]+")),
          str_detect(., "[0-9]") ~ as.numeric(str_extract(., "[0-9.]+")),
          TRUE ~ NA_real_
        )
      )
    )
  )

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(...)`.
## Caused by warning:
## ! NAs introduced by coercion

# Bootstrap missing reliabilities
div_scale_terciles <- quantile(dataset$items_div %>% unique() %>% setdiff(c(1, NA)), c(.33, .66), na.rm = TRUE)
perf_scale_terciles <- quantile(dataset$items_perf %>% unique() %>% setdiff(c(1, NA)), c(.33, .66), na.rm = TRUE)

dataset <- dataset %>%
  mutate(div_tercile = cut(dataset$items_div, breaks = c(1, div_scale_terciles, Inf), labels = c(1, 2, 3), include.lowest = FALSE),
         perf_tercile = cut(dataset$items_perf, breaks = c(1, perf_scale_terciles, Inf), labels = c(1, 2, 3), include.lowest = FALSE)
) 

div_reliabilities <-  split(dataset$reliab_div_conv, dataset$div_tercile) %>% map(na.omit)
perf_reliabilities <- split(dataset$reliab_perf_conv, dataset$perf_tercile) %>% map(na.omit)

sample_reliab <- function(x, items, type, tercile) {
  if (!is.na(x)) return(x)
  if (is.na(items)) {return(1)} # When both reliability and number of items are missing, no basis for adjustment
  if (items == 1) return(1)
  if (type == "div") {
    return(sample(div_reliabilities[[as.character(tercile)]], 1))
  }
  if (type == "perf") {
    return(sample(perf_reliabilities[[as.character(tercile)]], 1))
  }
  stop("Flow logic error in ", type)
}

set.seed(1347)

dataset <- dataset %>% rowwise() %>% 
  mutate(
  reliab_div = sample_reliab(reliab_div_conv, 
                             items_div, "div", div_tercile),
  reliab_perf = sample_reliab(reliab_perf_conv, 
                             items_perf, "perf", perf_tercile)
  ) %>% ungroup() %>% 
  mutate(r_adj = r_rep / (sqrt(reliab_div) * sqrt(reliab_div)),
         se =  sqrt((1 - r_rep^2) / (n_teams - 2)),
         r_scale = ifelse(r_adj == 0, 1, r_adj/r_rep),
         var_adj = se ^ 2 * (r_scale)^2) %>% 
  select(-r_scale)

# Cap adjusted rs to +/- 1 ... more extreme values only arise in smallest samples due to sampling error,
# and maintaining them only increases overall error (can arise when large correlations are paired with low
# reliabilities in bootstrapping)

dataset <- dataset %>% mutate(r_adj = pmin(pmax(-1, r_adj), 1))

(Expect warning about conversion of years due to use of case_when.)

Add moderators

if (!file.exists("data/hofstede.csv")) {
  download.file("https://geerthofstede.com/wp-content/uploads/2016/08/6-dimensions-for-website-2015-08-16.csv", "data/hofstede.csv")
}

hofstede <- read_delim("data/hofstede.csv", delim = ";", na = "#NULL!", show_col_types = FALSE) %>% 
  transmute(country, power_distance = pdi, collectivism = 100 - idv)

# Some names in Hofstede differ
# Data for Sri Lanka & Kazakhstan not available, so remains missing
hofstede_map <- c(
  "United States" = "U.S.A.", 
  "Hong Kong SAR China" = "Hong Kong", 
  "South Korea" = "Korea South", 
  "United Kingdom" = "Great Britain",
  "Cameroon" = "Africa West",  # where only regional data is available, used that
  "United Arab Emirates" = "Arab countries",  
  "Kuwait" = "Arab countries"
)

dataset <- dataset %>% 
  mutate(hofstede_match = ifelse(country %in% names(hofstede_map), 
  hofstede_map[country %>% as.character()], country %>% as.character()) %>%  factor()) %>% 
  left_join(hofstede, by = c("hofstede_match" = "country")) %>% 
  select(-hofstede_match)

citation_counts <- bind_rows(read_excel("data/citation_data_en.xlsx") %>% distinct(),
                             read_excel("data/citation_data_nen.xlsx") %>% distinct()) %>% 
  mutate(citation_count = coalesce(citation_count, citations))
                             

dataset <- dataset %>% 
  left_join(citation_counts %>% select(id = ID, citation_count), by = "id")

# NAs in citation count indicate that these entries could not be found on Google Scholar - 
# thus they have 0 Google Scholar citations.
dataset <- dataset %>% 
  mutate(citation_count = replace_na(citation_count, 0))

if (is.list(dataset$year_coll)) {
  dataset <- dataset %>%
      mutate(year_coll = null_to_NA(year_coll))
} 
  dataset <- dataset %>%
      mutate(year_coll_mean = ifelse(str_detect(year_coll, "–|-"),
                                sapply(str_split(year_coll, "–|-"), 
                                       function(x) mean(as.numeric(x))),
                                as.numeric(year_coll)),
             year_merged = coalesce(year_coll_mean,
                                year))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `year_coll_mean = ifelse(...)`.
## Caused by warning in `ifelse()`:
## ! NAs introduced by coercion

(Expect warning about conversion of years due to use of ifelse.)

Save data

write_rds(dataset, "data/full_dataset.rds")

Sample description

Our dataset consisted of 2638 effect sizes from 646 samples in 615 publications. Samples were located in 43 countries. The studies were published between 1966 and 2023.

Global distribution

source("helpers/world_map.R")

teams <- dataset %>% 
  filter(!is.na(country), !country == "Multiple") %>% 
  mutate(country = case_when(
    country == "United States" ~ "United States of America",
    country == "Hong Kong SAR China" ~ "Hong Kong",
    TRUE ~ country
  )) %>% 
  group_by(articlestudy, country) %>% 
  # NB: Underestimates sample size where only sub-groups are compared -
  # rare case, and fixing it precisely would require going back to original papers,
  # so we accept the slight underestimation for now
  summarise(n_teams = max(n_teams), .groups = "drop") %>% 
  {
    create_world_map(.$country, .$n_teams, "teams", scale_start = 10)
  }

studies <- dataset %>% 
  filter(!is.na(country), !country == "Multiple") %>% 
  mutate(country = case_when(
    country == "United States" ~ "United States of America",
    country == "Hong Kong SAR China" ~ "Hong Kong",
    TRUE ~ country
  )) %>% 
  group_by(articlestudy, country) %>% 
  summarise(studies = 1, .groups = "drop") 

studies <- studies %>% ungroup() %>% mutate(region = case_when(
  country != "Taiwan" ~ countrycode::countrycode(country, origin = "country.name", destination = "un.regionsub.name"),
  country == "Taiwan" ~ "Eastern Asia"))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `region = case_when(...)`.
## Caused by warning:
## ! Some values were not matched unambiguously: Taiwan

studies %>% 
  count(region) %>% 
  mutate(region_share = n / sum(n)) %>% 
  arrange(desc(region_share))

## # A tibble: 12 × 3
##    region                              n region_share
##    <chr>                           <int>        <dbl>
##  1 Eastern Asia                      205      0.370  
##  2 Northern America                  180      0.325  
##  3 Western Europe                     54      0.0975 
##  4 Southern Europe                    35      0.0632 
##  5 Northern Europe                    23      0.0415 
##  6 Western Asia                       19      0.0343 
##  7 Australia and New Zealand          14      0.0253 
##  8 Southern Asia                      11      0.0199 
##  9 South-eastern Asia                  7      0.0126 
## 10 Latin America and the Caribbean     3      0.00542
## 11 Sub-Saharan Africa                  2      0.00361
## 12 Central Asia                        1      0.00181

studies %>% 
  count(country) %>% 
  mutate(country_share = n / sum(n)) %>% 
  arrange(desc(country_share)) %>% 
  head(n = 10)

## # A tibble: 10 × 3
##    country                      n country_share
##    <chr>                    <int>         <dbl>
##  1 United States of America   177        0.319 
##  2 China                      119        0.215 
##  3 Taiwan                      43        0.0776
##  4 South Korea                 35        0.0632
##  5 Germany                     24        0.0433
##  6 Netherlands                 22        0.0397
##  7 Spain                       17        0.0307
##  8 Australia                   14        0.0253
##  9 United Kingdom              13        0.0235
## 10 Israel                      12        0.0217

studies_plot <- studies %>% 
  {
    create_world_map(.$country, .$studies, "studies")
  }

omitted <- dataset %>% 
  filter(is.na(country) | country == "Multiple") %>% 
  mutate(status = ifelse(is.na(country), "unknown", "multiple")) %>% 
  group_by(status, articlestudy) %>% 
  summarise(n_teams = max(n_teams), n_studies = 1, .groups = "drop_last") %>% 
  summarise(n_teams = sum(n_teams), n_studies = sum(n_studies), .groups = "drop")

p <- teams / studies_plot + plot_annotation(
  caption = glue("The maps exclude {omitted[omitted$status == 'multiple', 'n_studies']} studies ({scales::comma(round(omitted[omitted$status == 'multiple', 'n_teams'])[[1]])} teams) covering multiple countries, and {omitted[omitted$status == 'unknown', 'n_studies']} studies ({scales::comma(round(omitted[omitted$status == 'unknown', 'n_teams'])[[1]])} teams) where the country was not reported.") %>% str_wrap(60),
  tag_levels = "A")
          
ggsave("figures/world_map.png", p, width = 18, units = "cm")

## Saving 18 x 25.4 cm image

Distribution over time

studies <- dataset %>%
  group_by(year, domain, articlestudy) %>%
  summarise(n = 1, .groups = "drop") %>%
  mutate(type = "Samples")
studies_tot <- dataset %>%
  group_by(year, articlestudy) %>%
  summarise(n = 1, .groups = "drop") %>%
  mutate(type = "Samples", domain = "Total")
effects <- dataset %>%
  group_by(year, domain, articlestudy) %>%
  summarise(n = n(), .groups = "drop") %>%
  mutate(type = "Effects")
effects_tot <- dataset %>%
  group_by(year, articlestudy) %>%
  summarise(n = n(), .groups = "drop") %>%
  mutate(type = "Effects", domain = "Total")

stud_plot <- bind_rows(studies, studies_tot) %>% 
  ggplot(aes(x = year, y = n, fill = domain)) +
  geom_col() +
  facet_grid(domain ~ type, switch = "y") + 
  labs(caption = "NB: The number of total samples does not equal the sum of domain-specific studies, as many samples encompass multiple domains." %>% str_wrap(50))
 

eff_plot <- bind_rows(effects, effects_tot) %>% 
  ggplot(aes(x = year, y = n, fill = domain)) +
  geom_col() +
  facet_grid(domain ~ type, switch = "y") +
  labs(caption = "NB: The outlier number of Cognitive effects in 2022 is due to Qamar & Malik (2022) who assessed  personality traits across project stages (70 effect sizes)."  %>% str_wrap(50))

p <- stud_plot + eff_plot & 
  jtools::theme_apa() & 
  theme(legend.position = "none", strip.placement = "outside", plot.caption = element_text(hjust = 0)) &
   labs(x = "", y = "")


ggsave("figures/timeline.png", p, width = 18, units = "cm")

## Saving 18 x 17.8 cm image

Distribution over sub-domains

# Code adapted from https://github.com/nilsreimer/ironic-effects-meta-analysis
# Thanks to Nils Reimer!

fig_dat <- dataset %>% 
  group_by(domain, sub_dom, articlestudy) %>% 
  summarise(n = 1, .groups = "drop_last") %>% 
  summarise(tot = n(), .groups = "drop_last") %>%
  mutate(share = tot/sum(tot), sub_dom = str_to_title(as.character(sub_dom))) %>% 
  ungroup() %>% 
    arrange(share) %>% 
    filter(!is.na(sub_dom)) %>% 
    mutate(
      sub_dom = if_else(sub_dom == "Ethnicity", "Race/ethnicity", sub_dom),
      group = factor(sub_dom, levels = c("Other", sub_dom[sub_dom != "Other"] %>% unique()))
    ) 
 
fig_dat %>% group_by(domain) %>% arrange(-share) %>% slice_head(n = 5) %>% select(domain, sub_dom, share) %>% gt() %>% fmt_percent(share) %>% tab_header(title = "Top 5 sub-domains by domain") %>% cols_label(sub_dom = "Sub-domain", share = "Share")

Sub-domain	Share
Top 5 sub-domains by domain
Demographic
Gender	38.32%
Age	35.27%
Nationality	10.84%
Race/ethnicity	8.85%
Other	3.66%
Cognitive
Educational Level	32.34%
Degree	28.44%
Other	19.16%
Personality	10.18%
Values	6.89%
Job-related
Tenure	42.13%
Function	40.80%
Other	16.41%

fig_dat <- fig_dat %>% split(.$domain)

create_plot <- function(data, subtitle) {
  data %>%
      ggplot(., aes(x = share, y = group)) +
    labs(subtitle = subtitle) +
      geom_col(
        aes(fill = if_else(group == "Other", "grey82", "black")),
        width = 0.8
      )  +
      geom_text(
        aes(
          label = tot,
          colour = if_else(share < 0.10, "black", "white"),
          hjust = if_else(share < 0.10, -0.25, 1.25)
        ),
        size = 9/.pt
      ) +
      scale_x_continuous(
        labels = scales::percent_format(accuracy = 10),
        expand = c(0, 0)
      ) +
      scale_colour_identity() + 
      scale_fill_identity() +
      theme_minimal(base_size = 10) +
      theme(
        legend.position = "none",
        plot.title = element_text(colour = "black", face = "bold"),
        axis.text = element_text(colour = "black"),
        axis.title = element_blank(),
        panel.grid = element_blank(),
        panel.grid.major.x = element_line(colour = "grey92")
      )
}

dem <- create_plot(fig_dat$Demographic, "Demographic")
cog <- create_plot(fig_dat$Cognitive, "Cognitive")
job <- create_plot(fig_dat$`Job-related`, "Job-related")

p <- dem + cog + job + 
plot_annotation(caption = glue::glue("NB: Share missing to 100% are measures that combine sub-domains\n
({fmt_pct(1 - sum(fig_dat$Demographic$share))} for Demographic, very rare in others.)"))

ggsave("figures/sub-domains.png", p, width = 14, height = 10, units = "cm")
 
p

Distribution over sectors & functions

ind_sector <- dataset %>%
  select(articlestudy, ind_sector, stud_sample, setting) %>%
  distinct() %>%
  mutate(ind_sector = fct_lump_n(ind_sector, 10) %>% fct_recode("Mixed" = "Multiple/mixed")) %>%
  count(ind_sector, sort = TRUE)

df <- ind_sector %>%
  filter(!is.na(ind_sector)) %>%
  rename(group = ind_sector, tot = n) %>%
  mutate(share = tot / sum(tot) * 100, group = fct_inorder(group) %>% fct_rev()) %>%
  rename(sector = group, percentage = share)

# Calculate a truncated percentage for plotting
df$truncated_percentage <- ifelse(df$percentage > 25, 25, df$percentage)
df$truncated <- df$percentage > 25

# Plot with axis break
p_sec <- df %>%
  ggplot(aes(y = sector, x = truncated_percentage, fill = if_else(sector == "Other", "grey82", "black"))) +
  geom_bar(stat = "identity", width = 0.8) +
  scale_x_continuous(
    limits = c(0, 29.9), expand = c(0, 0), breaks = seq(0, 24, 5),
    labels = scales::percent_format(accuracy = 1, scale = 1)
  ) +
  scale_fill_identity() +
  scale_colour_identity() +
  theme_minimal(base_size = 10) +
  theme(
    legend.position = "none",
    plot.title = element_text(colour = "black", face = "bold"),
    axis.text = element_text(colour = "black"),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    panel.grid.major.x = element_line(colour = "grey92")
  ) +
  geom_segment(
    data = subset(df, percentage > 25),
    aes(y = sector, yend = sector, xend = 25, x = 24.2),
    arrow = arrow(type = "closed", length = unit(0.09, "inches")),
    inherit.aes = FALSE, color = "darkred", linewidth = 1.5
  ) +
  geom_text(
    data = subset(df, truncated),
    aes(x = 27.7, label = paste0(round(percentage), "%")),
    vjust = .3, color = "darkred", size = 9 / .pt
  ) +
  geom_text(
    aes(
      label = tot,
      colour = if_else(percentage < 10, "black", "white"),
      hjust = if_else(truncated_percentage == 25, 3, if_else(truncated_percentage < 10, -0.5, 1.75))
    ),
    size = 9 / .pt
  ) +
  labs(subtitle = "Studies per industry sector") + 
  annotate("segment", x = c(21.5, 21.85), xend = c(22.5, 22.85), y = 9.5, yend = 10.5, colour = "darkred")

team_function <- dataset %>%
  select(articlestudy, team_function, stud_sample, setting, tmt) %>%
  distinct() %>%
  mutate(team_function = fct_lump_n(team_function, 10)) 

print(glue::glue("Out of the management teams, {fmt_pct(team_function %>% filter(team_function == 'Management') %>% count(tmt) %>% mutate(share = n/sum(n)) %>% filter(tmt == 'yes') %>% pull(share))} were top management teams."))

## Out of the management teams, 89.0% were top management teams.

team_function <- team_function %>%
  count(team_function, sort = TRUE)

df <- team_function %>%
  filter(!is.na(team_function)) %>%
  rename(group = team_function, tot = n) %>%
  mutate(share = tot / sum(tot) * 100, group = fct_inorder(group) %>% fct_rev()) %>%
  rename(percentage = share)

# Calculate a truncated percentage for plotting
df$truncated_percentage <- ifelse(df$percentage > 25, 25, df$percentage)
df$truncated <- df$percentage > 25

# Plot with axis break
p_fun <- df %>%
  ggplot(aes(y = group, x = truncated_percentage, fill = if_else(group == "Other", "grey82", "black"))) +
  geom_bar(stat = "identity", width = 0.8) +
  scale_x_continuous(
    limits = c(0, 29.9), expand = c(0, 0), breaks = seq(0, 24, 5),
    labels = scales::percent_format(accuracy = 1, scale = 1)
  ) +
  scale_fill_identity() +
  scale_colour_identity() +
  theme_minimal(base_size = 10) +
  theme(
    legend.position = "none",
    plot.title = element_text(colour = "black", face = "bold"),
    axis.text = element_text(colour = "black"),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    panel.grid.major.x = element_line(colour = "grey92")
  ) +
  geom_segment(
    data = subset(df, percentage > 25),
    aes(y = group, yend = group, xend = 25, x = 24.2),
    arrow = arrow(type = "closed", length = unit(0.09, "inches")),
    inherit.aes = FALSE, color = "darkred", linewidth = 1.5
  ) +
  geom_text(
    data = subset(df, truncated),
    aes(x = 27.7, label = paste0(round(percentage), "%")),
    vjust = .3, color = "darkred", size = 9 / .pt
  ) +
  geom_text(
    aes(
      label = tot,
      colour = if_else(truncated_percentage < 10, "black", "white"),
      hjust = if_else(truncated_percentage == 25, 3, if_else(truncated_percentage < 10, -0.5, 1.75))
    ),
    size = 9 / .pt
  ) +
  labs(subtitle = "Studies per function") + 
  annotate("segment", x = c(21.5, 21.85), xend = c(22.5, 22.85), y = 9.5, yend = 10.5, colour = "darkred")



p <- p_sec + p_fun + plot_annotation(caption = glue::glue("NB: Share missing to 100% are missing values,\nlargely from business simulations and experiments\n({fmt_pct(dataset %>% select(articlestudy, team_function) %>% 
  distinct() %>% summarise(mean(is.na(ind_sector))) %>% pull())} for sectors & {fmt_pct(dataset %>% select(articlestudy, team_function) %>% 
  distinct() %>% summarise(mean(is.na(team_function))) %>% pull())} for functions)."))

ggsave("figures/functions_and_sectors.png", p, width = 18, height = 12, units = "cm")

p

Distribution over moderators

Some of the registered moderators could not be coded meaningfully. In particular, diversity climate and psychological safety were rarely reports (< 5%) - and if so, then always as positive. Therefore, they are omitted from the reporting.

variables <- c("art_focus", "pub_status", "interdep", "complexity", "longevity", "tmt", "stud_sample",
               "meas_type", "design", "rater", "virtuality", "auth_diff", "language")

var_names <-  tibble::tribble(
  ~old,           ~new,
   "art_focus",    "Article focus",   
   "pub_status",   "Publication status",  
   "language",      "Language",      
   "design",       "Design",      
   "tmt",          "TMT",         
   "stud_sample",  "Student sample", 
   "meas_type",    "Diversity measure",   
   "rater",        "Performance rater",       
   "interdep",     "Interdependence",    
   "complexity",   "Complexity",  
   "virtuality",   "Virtuality",  
   "longevity",    "Longevity",
   "auth_diff",    "Authority Differentiation",   
   "collectivism",  "Collectivism",   
   "power_distance",    "Power distance",   
   "year_merged",    "Year of data collection"   
)

level_names <- tibble::tribble(
  ~var,           ~level_old,                 ~level_new,                 
   "art_focus",    "focal H",                  "Focal hypothesis",                 
   "art_focus",    "auxiliary H",              "Auxiliary hypothesis",             
   "art_focus",    "descriptive",              "Descriptive",             
   "pub_status",   "Published",                "Published",               
   "pub_status",   "Masters Dissertation",          "Masters Dissertation",         
   "pub_status",   "Working paper/Preprint",   "Working Paper/Preprint",  
   "pub_status",   "Conference presentation",  "Conference Presentation", 
   "pub_status",   "PhD Dissertation",         "PhD Dissertation",        
   "interdep",     "high",                     "High",                    
   "interdep",     "medium",                   "Medium",                  
   "interdep",     "low",                      "Low",                     
   "complexity",   "high",                     "High",                    
   "complexity",   "medium",                   "Medium",                  
   "complexity",   "low",                      "Low",                     
   "tmt",          "yes",                      "Yes",                     
   "tmt",          "no",                       "No",                      
   "stud_sample",  "yes",                      "Yes",                     
   "stud_sample",  "no",                       "No",                      
   "meas_type",    "Variety",                  "Variety",                 
   "meas_type",    "Separation",               "Separation",              
   "meas_type",    "Other",                    "Other",                   
   "design",       "Experimental",             "Experimental",            
   "design",       "Observational",            "Observational",           
   "rater",        "Objective",                "Objective",               
   "rater",        "Subjective - self",        "Subjective - Self",       
   "rater",        "Subjective - supervisor",  "Subjective - Supervisor", 
   "rater",        "Subjective - external",    "Subjective - External",   
   "virtuality",   "physical",                 "Physical",                
   "virtuality",   "hybrid-members",           "Hybrid-Members",          
   "virtuality",   "virtual",                  "Virtual",                 
   "auth_diff",    "high",                     "High",                    
   "auth_diff",    "mixed",                    "Mixed",                   
   "auth_diff",    "low",                      "Low",
   "language",  "chinese",     "Chinese",    
   "language",  "dutch",       "Dutch",      
   "language",  "english",     "English",    
   "language",  "french",      "French",     
   "language",  "german",      "German",     
   "language",  "indonesian",  "Indonesian", 
   "language",  "italian",     "Italian",    
   "language",  "japanese",    "Japanese",   
   "language",  "korean",      "Korean",     
   "language",  "portuguese",  "Portuguese", 
   "language",  "spanish",     "Spanish",
   "longevity",  "hours",    "Hours",   
   "longevity",  "days",     "Days",    
   "longevity",  "weeks",    "Weeks",   
   "longevity",  "months",   "Months",  
   "longevity",  "years",    "Years",
   "longevity",  "stable",   "Stable"  
)


summarize_cat_variable <- function(dataset, variable) {
    # Domain-specific summary
    domain_summary <- dataset %>%
        select(articlestudy, all_of(variable), domain) %>%
        distinct() %>%
      rename(level = !!sym(variable)) %>% 
        group_by(domain, level) %>%
        summarise(count = n(), .groups = 'drop_last') %>%
        mutate(share = count / sum(count), variable = variable)

    # Total summary
    total_summary <- dataset %>%
        select(articlestudy, all_of(variable)) %>%
        distinct() %>%
            rename(level = !!sym(variable)) %>% 

        group_by(level) %>%
        summarise(count = n(), .groups = 'drop') %>%
        mutate(share = count / sum(count), domain = "Total", variable = variable)

    # Combine domain-specific and total summaries
    summary <- bind_rows(domain_summary, total_summary)
    
    # Pivot wider and format the count and share
    summary %>%
        mutate(count_share =  paste0(count, " (", scales::percent(share, accuracy = .1), ")")) %>%
        select(-count, -share) %>%
        tidyr::pivot_wider(names_from = domain, values_from = count_share, values_fill = "0") %>%
        select(variable, level, everything()) %>% 
      left_join(total_summary %>% select(variable, level, total_count = count), by = c("variable", "level"))
}

# Function to rename and reorder variables and levels
apply_var_and_level_names <- function(result_table, var_names, level_names) {

  # Order
  result_table <- result_table %>%
    arrange(match(variable, var_names$old),
            match(paste(variable, level), paste(level_names$var, level_names$level_old)))
  
    # Renaming levels
    result_table <- result_table %>%
        left_join(level_names, by = c("variable" = "var", "level" = "level_old")) %>%
        mutate(level = ifelse(is.na(level_new), level, level_new)) %>%
        select(-level_new)

        # Renaming variables
    result_table <- result_table %>%
        left_join(var_names, by = c("variable" = "old")) %>%
        mutate(variable = ifelse(is.na(new), variable, new)) %>%
        select(-new)

    result_table
}

result_table <- purrr::map_dfr(variables, ~summarize_cat_variable(dataset, .x))

# Apply renaming and reordering
final_table <- apply_var_and_level_names(result_table, var_names, level_names)

final_table %>% 
  gt(groupname_col = "variable", rowname_col = "level") %>%
  tab_stubhead(label = "Variable") %>%
  cols_label(total_count = "") %>% 
  tab_spanner("Diversity domain", Demographic:`Job-related`) %>% 
  sub_missing(columns = everything(), missing_text = "(missing)") %>% 
  gt_apa_style() %>% tab_style(
        style = list(cell_text(weight = "bold")),
        locations = cells_row_groups()
    ) %>% 
      tab_style(
        style = cell_text(align = "left", indent = px(15)),
        locations = cells_stub()
    ) %>% 
  tab_header("Distribution of effect sizes across moderator variables") %>% 
    gt_plt_bar(column = total_count, keep_column = FALSE, color = "grey")

Variable	Diversity domain			Total
Distribution of effect sizes across moderator variables
Variable	Demographic	Cognitive	Job-related	Total
Article focus
Focal hypothesis	209 (45.4%)	131 (45.8%)	165 (45.6%)	351 (48.3%)
Auxiliary hypothesis	118 (25.7%)	95 (33.2%)	105 (29.0%)	200 (27.5%)
Descriptive	133 (28.9%)	60 (21.0%)	92 (25.4%)	176 (24.2%)
Publication status
Published	339 (78.7%)	204 (72.9%)	270 (78.5%)	505 (78.1%)
Masters Dissertation	33 (7.7%)	37 (13.2%)	36 (10.5%)	51 (7.9%)
Working Paper/Preprint	14 (3.2%)	0	3 (0.9%)	15 (2.3%)
Conference Presentation	12 (2.8%)	7 (2.5%)	9 (2.6%)	22 (3.4%)
PhD Dissertation	33 (7.7%)	32 (11.4%)	26 (7.6%)	54 (8.3%)
Language
Chinese	62 (14.4%)	60 (21.4%)	57 (16.6%)	81 (12.5%)
Dutch	0	0	1 (0.3%)	1 (0.2%)
English	343 (79.8%)	207 (73.9%)	274 (79.7%)	534 (82.7%)
French	1 (0.2%)	0	0	1 (0.2%)
German	5 (1.2%)	1 (0.4%)	1 (0.3%)	5 (0.8%)
Indonesian	0	1 (0.4%)	1 (0.3%)	1 (0.2%)
Italian	1 (0.2%)	0	0	1 (0.2%)
Japanese	2 (0.5%)	2 (0.7%)	2 (0.6%)	3 (0.5%)
Korean	13 (3.0%)	7 (2.5%)	5 (1.5%)	15 (2.3%)
Portuguese	2 (0.5%)	1 (0.4%)	3 (0.9%)	3 (0.5%)
Spanish	1 (0.2%)	1 (0.4%)	0	1 (0.2%)
Design
Experimental	15 (3.5%)	19 (6.8%)	2 (0.6%)	29 (4.5%)
Observational	415 (96.5%)	261 (93.2%)	342 (99.4%)	617 (95.5%)
TMT
Yes	173 (40.1%)	128 (45.7%)	179 (52.0%)	259 (40.0%)
No	258 (59.9%)	152 (54.3%)	165 (48.0%)	388 (60.0%)
Student sample
Yes	38 (8.8%)	31 (11.1%)	7 (2.0%)	55 (8.5%)
No	392 (91.2%)	249 (88.9%)	338 (98.0%)	592 (91.5%)
Diversity measure
Variety	285 (51.1%)	174 (57.6%)	239 (55.1%)	494 (53.6%)
Separation	189 (33.9%)	115 (38.1%)	173 (39.9%)	324 (35.1%)
Other	73 (13.1%)	12 (4.0%)	16 (3.7%)	92 (10.0%)
(missing)	11 (2.0%)	1 (0.3%)	6 (1.4%)	12 (1.3%)
Performance rater
Objective	240 (52.1%)	157 (52.3%)	205 (55.4%)	352 (50.6%)
Subjective - Self	87 (18.9%)	53 (17.7%)	64 (17.3%)	125 (18.0%)
Subjective - Supervisor	83 (18.0%)	58 (19.3%)	73 (19.7%)	135 (19.4%)
Subjective - External	50 (10.8%)	31 (10.3%)	26 (7.0%)	81 (11.6%)
(missing)	1 (0.2%)	1 (0.3%)	2 (0.5%)	3 (0.4%)
Interdependence
High	310 (71.8%)	208 (74.0%)	264 (76.3%)	472 (72.4%)
Medium	34 (7.9%)	22 (7.8%)	25 (7.2%)	57 (8.7%)
Low	11 (2.5%)	3 (1.1%)	10 (2.9%)	14 (2.1%)
(missing)	77 (17.8%)	48 (17.1%)	47 (13.6%)	109 (16.7%)
Complexity
High	295 (68.3%)	191 (68.0%)	281 (81.7%)	454 (70.0%)
Medium	65 (15.0%)	50 (17.8%)	29 (8.4%)	101 (15.6%)
Low	20 (4.6%)	9 (3.2%)	4 (1.2%)	25 (3.9%)
(missing)	52 (12.0%)	31 (11.0%)	30 (8.7%)	69 (10.6%)
Virtuality
Physical	79 (18.4%)	40 (14.2%)	46 (13.3%)	115 (17.7%)
Hybrid-Members	4 (0.9%)	1 (0.4%)	2 (0.6%)	6 (0.9%)
Virtual	16 (3.7%)	4 (1.4%)	11 (3.2%)	23 (3.5%)
(missing)	331 (77.0%)	236 (84.0%)	286 (82.9%)	504 (77.8%)
Longevity
Hours	18 (4.2%)	12 (4.3%)	2 (0.6%)	26 (4.0%)
Days	4 (0.9%)	5 (1.8%)	4 (1.2%)	8 (1.2%)
Weeks	21 (4.9%)	8 (2.8%)	9 (2.6%)	25 (3.8%)
Months	37 (8.6%)	31 (11.0%)	24 (7.0%)	70 (10.8%)
Years	49 (11.4%)	33 (11.7%)	51 (14.8%)	81 (12.5%)
Stable	234 (54.4%)	150 (53.2%)	211 (61.2%)	334 (51.4%)
(missing)	67 (15.6%)	43 (15.2%)	44 (12.8%)	106 (16.3%)
Authority Differentiation
High	4 (0.9%)	4 (1.4%)	3 (0.9%)	7 (1.1%)
Mixed	3 (0.7%)	2 (0.7%)	2 (0.6%)	6 (0.9%)
Low	35 (8.1%)	24 (8.6%)	11 (3.2%)	51 (7.9%)
(missing)	389 (90.3%)	250 (89.3%)	328 (95.3%)	583 (90.1%)

Diversity and team-performance meta-analysis