if (!require("pacman")) {install.packages("pacman")}
pacman::p_load(broom,
               broom.mixed,
               emmeans,
               ggtext,
               janitor,
               lme4,
               multcomp,
               multcompView,
               patchwork,
               tidyverse)

# load data ----
df_articles <- read_csv("per_article.csv") %>% 
  mutate(article_type = as.factor(str_to_sentence(article_type)))
df_references <- read_csv("per_reference.csv")
df_quotes <- read_csv("per_quote.csv")

Overview of articles and journals

Number of articles

Total

df_articles %>% 
  count()

## # A tibble: 1 × 1
##       n
##   <int>
## 1 17145

Number of articles without any references

df_articles %>% 
  count(n_all_refs_lens == 0)

## # A tibble: 2 × 2
##   `n_all_refs_lens == 0`     n
##   <lgl>                  <int>
## 1 FALSE                  15752
## 2 TRUE                    1393

Manually inspected

df_articles %>% 
  count(manually_verified,
        include_analysis)

## # A tibble: 3 × 3
##   manually_verified include_analysis     n
##   <lgl>             <lgl>            <int>
## 1 FALSE             FALSE            16459
## 2 TRUE              FALSE               41
## 3 TRUE              TRUE               645

Number of journals

Total

df_articles %>% 
  count(source_title) %>%
  count()

## # A tibble: 1 × 1
##       n
##   <int>
## 1   144

Manually inspected

df_articles %>% 
  filter(manually_verified) %>% 
  count(source_title) %>%
  count()

## # A tibble: 1 × 1
##       n
##   <int>
## 1   120

Number of articles retrieved that cited at least one meta-analysis

df_articles %>% 
  filter(n_all_refs_lens > 0) %>% 
  group_by(detected_meta = n_meta_refs_auto > 0) %>% 
  summarise(n = n(),
            prop = n / nrow(df_articles %>% 
                              filter(n_all_refs_lens > 0)))

## # A tibble: 2 × 3
##   detected_meta     n  prop
##   <lgl>         <int> <dbl>
## 1 FALSE         12414 0.788
## 2 TRUE           3338 0.212

Proportion of meta-references from all references (automated)

df_articles %>% 
  filter(n_all_refs_lens != 0) %>% 
  mutate(prop_meta_refs = n_meta_refs_auto / n_all_refs_lens) %>% 
  summarise(mean_prop = mean(prop_meta_refs),
            se_prop = sd(prop_meta_refs) / sqrt(nrow(df_articles %>% 
                                                       filter(n_all_refs_lens != 0))))

## # A tibble: 1 × 2
##   mean_prop  se_prop
##       <dbl>    <dbl>
## 1   0.00618 0.000157

True meta-references

Proportion of true meta-references from all meta-references

df_articles %>% 
  filter(include_analysis) %>%
  summarise(prop = sum(n_true_meta_refs) / sum(n_meta_refs_manual))

## # A tibble: 1 × 1
##    prop
##   <dbl>
## 1 0.836

Model on absolute number of true meta-references

glmer_n_true_neg_bin <-
  df_articles %>% 
  filter(include_analysis) %>% 
  glmer.nb(data = .,
           n_true_meta_refs ~ 
             article_type - 1 +
             (1|source_title))

summary(glht(glmer_n_true_neg_bin,
             linfct = mcp(article_type = "Tukey")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lme4::glmer(formula = n_true_meta_refs ~ article_type - 1 + (1 | 
##     source_title), data = ., family = MASS::negative.binomial(theta = 71468.4760023709))
## 
## Linear Hypotheses:
##                                       Estimate Std. Error z value Pr(>|z|)    
## Meta-analytical - Empirical == 0       0.68456    0.09807   6.980  < 0.001 ***
## Methodological - Empirical == 0       -0.17519    0.23145  -0.757  0.96904    
## Other - Empirical == 0                 0.10385    0.21195   0.490  0.99569    
## Review - Empirical == 0                0.09367    0.09682   0.967  0.91471    
## Theoretical - Empirical == 0          -0.23794    0.23239  -1.024  0.89382    
## Methodological - Meta-analytical == 0 -0.85975    0.24532  -3.505  0.00469 ** 
## Other - Meta-analytical == 0          -0.58071    0.22789  -2.548  0.09346 .  
## Review - Meta-analytical == 0         -0.59089    0.12551  -4.708  < 0.001 ***
## Theoretical - Meta-analytical == 0    -0.92250    0.24526  -3.761  0.00198 ** 
## Other - Methodological == 0            0.27904    0.30926   0.902  0.93541    
## Review - Methodological == 0           0.26886    0.24524   1.096  0.86321    
## Theoretical - Methodological == 0     -0.06275    0.32312  -0.194  0.99995    
## Review - Other == 0                   -0.01018    0.22722  -0.045  1.00000    
## Theoretical - Other == 0              -0.34179    0.30921  -1.105  0.85887    
## Theoretical - Review == 0             -0.33161    0.24651  -1.345  0.72683    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

cld(glht(glmer_n_true_neg_bin,
         linfct = mcp(article_type = "Tukey")),
    test = adjusted("none"),
    letters = Letters)

##       Empirical Meta-analytical  Methodological           Other          Review 
##             "a"             "b"             "a"            "ab"             "a" 
##     Theoretical 
##             "a"

Model on all references

glmer_n_all_neg_bin <-
  df_articles %>% 
  filter(include_analysis) %>% 
  glmer.nb(data = .,
           n_all_refs_wos ~ 
             article_type - 1 +
             (1|source_title))

summary(glht(glmer_n_all_neg_bin,
             linfct = mcp(article_type = "Tukey")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lme4::glmer(formula = n_all_refs_wos ~ article_type - 1 + (1 | 
##     source_title), data = ., family = MASS::negative.binomial(theta = 8.35756459311107))
## 
## Linear Hypotheses:
##                                        Estimate Std. Error z value Pr(>|z|)    
## Meta-analytical - Empirical == 0       0.281716   0.062223   4.528   <0.001 ***
## Methodological - Empirical == 0        0.009627   0.099975   0.096   1.0000    
## Other - Empirical == 0                 0.094336   0.103710   0.910   0.9349    
## Review - Empirical == 0                0.464147   0.048824   9.507   <0.001 ***
## Theoretical - Empirical == 0          -0.036831   0.098903  -0.372   0.9989    
## Methodological - Meta-analytical == 0 -0.272089   0.114548  -2.375   0.1460    
## Other - Meta-analytical == 0          -0.187380   0.118026  -1.588   0.5730    
## Review - Meta-analytical == 0          0.182431   0.074145   2.460   0.1194    
## Theoretical - Meta-analytical == 0    -0.318548   0.112232  -2.838   0.0446 *  
## Other - Methodological == 0            0.084709   0.141819   0.597   0.9895    
## Review - Methodological == 0           0.454520   0.108474   4.190   <0.001 ***
## Theoretical - Methodological == 0     -0.046459   0.136515  -0.340   0.9993    
## Review - Other == 0                    0.369810   0.111623   3.313   0.0103 *  
## Theoretical - Other == 0              -0.131168   0.140351  -0.935   0.9275    
## Theoretical - Review == 0             -0.500978   0.107703  -4.651   <0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

cld(glht(glmer_n_all_neg_bin,
         linfct = mcp(article_type = "Tukey")),
    test = adjusted("none"),
    letters = Letters)

##       Empirical Meta-analytical  Methodological           Other          Review 
##             "a"            "bc"            "ac"            "ac"             "b" 
##     Theoretical 
##             "a"

Model on relative number of true meta-references to all references

glmer_rel_true <-
  df_articles %>% 
  filter(include_analysis) %>% 
  glmer(data = .,
        cbind(n_true_meta_refs,
              n_all_refs_wos - n_true_meta_refs) ~ 
          article_type - 1 +
          (1|source_title),
        family = "binomial")

summary(glht(glmer_rel_true,
             linfct = mcp(article_type = "Tukey")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glmer(formula = cbind(n_true_meta_refs, n_all_refs_wos - n_true_meta_refs) ~ 
##     article_type - 1 + (1 | source_title), data = ., family = "binomial")
## 
## Linear Hypotheses:
##                                       Estimate Std. Error z value Pr(>|z|)    
## Meta-analytical - Empirical == 0       0.41973    0.10276   4.085  < 0.001 ***
## Methodological - Empirical == 0       -0.20019    0.23356  -0.857  0.94800    
## Other - Empirical == 0                 0.01374    0.21651   0.063  1.00000    
## Review - Empirical == 0               -0.38418    0.10092  -3.807  0.00153 ** 
## Theoretical - Empirical == 0          -0.20263    0.23601  -0.859  0.94760    
## Methodological - Meta-analytical == 0 -0.61992    0.24790  -2.501  0.10580    
## Other - Meta-analytical == 0          -0.40599    0.23280  -1.744  0.46207    
## Review - Meta-analytical == 0         -0.80391    0.13092  -6.141  < 0.001 ***
## Theoretical - Meta-analytical == 0    -0.62236    0.24870  -2.502  0.10524    
## Other - Methodological == 0            0.21393    0.31364   0.682  0.98053    
## Review - Methodological == 0          -0.18399    0.24849  -0.740  0.97206    
## Theoretical - Methodological == 0     -0.00244    0.32515  -0.008  1.00000    
## Review - Other == 0                   -0.39792    0.23253  -1.711  0.48363    
## Theoretical - Other == 0              -0.21637    0.31451  -0.688  0.97976    
## Theoretical - Review == 0              0.18155    0.25118   0.723  0.97484    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

emm <- emmeans(glmer_rel_true, 
               ~ article_type, 
               type = "response")

emm

##  article_type      prob       SE  df asymp.LCL asymp.UCL
##  Empirical       0.0193 0.000893 Inf    0.0176    0.0211
##  Meta-analytical 0.0290 0.002870 Inf    0.0239    0.0352
##  Methodological  0.0158 0.003620 Inf    0.0101    0.0247
##  Other           0.0195 0.004110 Inf    0.0129    0.0294
##  Review          0.0132 0.001280 Inf    0.0109    0.0160
##  Theoretical     0.0158 0.003640 Inf    0.0100    0.0248
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale

cld(glht(glmer_rel_true,
         linfct = mcp(article_type = "Tukey")),
    test = adjusted("none"),
    letters = Letters)

##       Empirical Meta-analytical  Methodological           Other          Review 
##             "a"             "b"           "abc"           "abc"             "c" 
##     Theoretical 
##           "abc"

Fig. 2

# fig 2a ----
fig_2a <-
  df_articles %>% 
  filter(include_analysis) %>% 
  ggplot(aes(x = article_type,
             y = n_true_meta_refs)) +
  geom_boxplot(outliers = F) +
  # geom_jitter(height = 0,
  #             width = 0.2,
  #             alpha = 0.3) +
  geom_point(data = tidy(glmer_n_true_neg_bin) %>%
               mutate(corrected_est = exp(estimate),
                      term = str_remove(term,
                                        "article_type")) %>% 
               filter(is.na(group)),
             aes(x = term,
                 y = corrected_est),
             col = "red",
             size = 2) +
  annotate("text",
           x = c(1:6),
           y = -0.05,
           label = c("a",
                     "b",
                     "a",
                     "ab",
                     "a",
                     "a")) +
  annotate("text",
           x = c(1:6),
           y = 13,
           label = paste0("N = ",
                          df_articles %>% 
                            filter(include_analysis) %>% 
                            count(article_type) %>% 
                            pull(n))) +
  labs(x = "Article type",
       y = "Number of true meta-references") +
  scale_y_continuous(limits = c(-1, 
                                14),
                     breaks = c(0, 
                                5, 
                                10)) +
  theme_classic() +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_text(margin = margin(r = 0.3, 
                                                    unit = "cm"),
                                    size = 12),
        axis.text = element_text(size = 9),
        plot.tag.position = c(0.14, 
                              0.95))

# fig 2b ----
fig_2b <-
  df_articles %>% 
  filter(include_analysis) %>% 
  ggplot(aes(x = article_type,
             y = n_all_refs_wos)) +
  geom_boxplot(outliers = F) +
  # geom_jitter(height = 0,
  #             width = 0.2,
  #             alpha = 0.3) +
  geom_point(data = tidy(glmer_n_all_neg_bin) %>%
               mutate(corrected_est = exp(estimate),
                      term = str_remove(term,
                                        "article_type")) %>% 
               filter(is.na(group)),
             aes(x = term,
                 y = corrected_est),
             col = "red",
             size = 2) +
  annotate("text",
           x = c(1:6),
           y = -1,
           label = c("a",
                     "bc",
                     "ac",
                     "ac",
                     "b",
                     "a")) +
  labs(x = "Article type",
       y = "Total number of references") +
  scale_y_continuous(limits = c(-1, 
                                270),
                     breaks = seq(0, 
                                  500, 
                                  100)) +
  theme_classic() +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_text(margin = margin(r = 0.3, unit = "cm"),
                                    size = 12),
        axis.text = element_text(size = 9),
        plot.tag.position = c(0.14, 0.95))

# fig 2c ----
fig_2c <- 
  df_articles %>% 
  filter(include_analysis) %>% 
  ggplot(aes(x = article_type,
             y = rel_true_meta_refs)) +
  geom_boxplot(outliers = F) +
  # geom_jitter(height = 0,
  #             width = 0.2,
  #             alpha = 0.3) +
  geom_point(data = as.data.frame(emmeans(glmer_rel_true,
                                          ~ article_type, 
                                          type = "response")),
             aes(x = article_type,
                 y = prob),
             col = "red",
             size = 2) +
  annotate("text",
           x = c(1:6),
           y = -0.01,
           label = c("a",
                     "b",
                     rep("abc", 2),
                     "c",
                     "abc")) +
  labs(x = "Article type",
       y = "Proportion of true meta-references\namong all references") +
  scale_y_continuous(limits = c(-0.02, 
                                0.09),
                     breaks = c(0, 
                                0.05)) +
  theme_classic() +
  theme(axis.title.x = element_text(margin = margin(t = 0.3, unit = "cm"),
                                    size = 12),
        axis.title.y = element_text(margin = margin(r = 0.3, unit = "cm"),
                                    size = 12),
        axis.text = element_text(size = 9),
        plot.tag.position = c(0.14, 
                              0.95))

(fig_2 <-
    fig_2a + 
    fig_2b + 
    fig_2c + 
    plot_layout(nrow = 3) +
    plot_annotation(tag_levels = "A"))

ggsave("fig_2.jpg",
       fig_2,
       bg = "white",
       dpi = 600,
       width = 7,
       height = 7*1.41*0.85,
       units = "in")

Fig. S2

glmer_jif_prop <-
  df_articles %>% 
  filter(include_analysis) %>% 
  mutate(jif_2022_s = scale(jif_2022)) %>% 
  glmer(data = .,
        cbind(n_true_meta_refs,
              n_all_refs_wos - n_true_meta_refs) ~ 
          jif_2022_s +
          (1|source_title) +
          (1|article_type),
        family = "binomial")

summary(glmer_jif_prop)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(n_true_meta_refs, n_all_refs_wos - n_true_meta_refs) ~  
##     jif_2022_s + (1 | source_title) + (1 | article_type)
##    Data: .
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    2021.2    2039.0   -1006.6    2013.2       641 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7957 -0.4998 -0.1458  0.4313  5.7613 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  source_title (Intercept) 0.06568  0.2563  
##  article_type (Intercept) 0.06345  0.2519  
## Number of obs: 645, groups:  source_title, 120; article_type, 6
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.97867    0.12412 -32.054   <2e-16 ***
## jif_2022_s   0.02581    0.04818   0.536    0.592    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## jif_2022_s 0.007

emmeans_df <- 
  tibble(as.data.frame(emmeans(glmer_jif_prop,
                               ~ jif_2022_s,
                               at = list(jif_2022_s = seq(min(scale(df_articles %>% 
                                                                      filter(include_analysis) %>% 
                                                                      pull(jif_2022))), 
                                                          max((scale(df_articles %>% 
                                                                       filter(include_analysis) %>% 
                                                                       pull(jif_2022)))), 
                                                          length.out = 100)),
                               type = "response"))) %>% 
  mutate(jif_2022 = (jif_2022_s * 
                       sd(df_articles %>% 
                            filter(include_analysis) %>% 
                            pull(jif_2022))) +
           mean(df_articles %>% 
                  filter(include_analysis) %>% 
                  pull(jif_2022)))

## fig. s2 ----

(fig_s2 <- 
   df_articles %>% 
   filter(include_analysis) %>% 
   # mutate(jif_2022_s = scale(jif_2022)) %>% 
   ggplot(aes(x = jif_2022,
              y = rel_true_meta_refs)) +
   geom_jitter(height = 0,
               width = 0.2,
               alpha = 0.3) +
   geom_smooth(data = emmeans_df,
               aes(x = jif_2022,
                   y = prob),
               col = "red") +
   geom_smooth(data = emmeans_df,
               aes(x = jif_2022,
                   y = asymp.LCL),
               linetype = "dashed",
               col = "red",
               alpha = 0.3) +
   geom_smooth(data = emmeans_df,
               aes(x = jif_2022,
                   y = asymp.UCL),
               linetype = "dashed",
               col = "red",
               alpha = 0.3) +
   labs(x = "Clarivate's 2022 journal impact factor",
        y = "Proportion of true meta references\namong all references",
        col = "Article type") +
   theme_classic() +
   theme(axis.title.x = element_text(margin = margin(t = 0.3, unit = "cm"),
                                     size = 12),
         axis.title.y = element_text(margin = margin(r = 0.3, unit = "cm"),
                                     size = 12),
         axis.text = element_text(size = 9)))

# ggsave("fig_s2.jpg",
#        fig_s2,
#        bg = "white",
#        dpi = 600,
#        width = 7,
#        height = 7*1.41*0.85*0.5,
#        units = "in")

Meta-references per manuscript section

x <-
  df_references %>% 
  distinct(article_id,
           meta_ref_id,
           meta_ref_section_adj,
           .keep_all = T) %>%
  left_join(df_articles %>% 
              dplyr::select(article_id,
                            article_type)) %>%
  filter(is_meta_ref_meta_analysis,
         !is.na(meta_ref_section_adj),
         meta_ref_section_adj != "other",
         article_type != "invalid") %>%
  count(article_type,
        meta_ref_section_adj) %>% 
  pivot_wider(names_from = meta_ref_section_adj,
              values_from = n) %>% 
  mutate(methods = replace_na(methods, 0),
         total = introduction + methods + `results/discussion/conclusion`,
         prop_intro = introduction / total,
         prop_methods = methods / total,
         prop_res = `results/discussion/conclusion` / total)

Model for introduction section

glm_introduction <-
  x %>% 
  glm(data = .,
      cbind(introduction,
            total - introduction) ~ 
        article_type - 1,
      family = "binomial")

summary(glht(glm_introduction,
             linfct = mcp(article_type = "Tukey")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = cbind(introduction, total - introduction) ~ article_type - 
##     1, family = "binomial", data = .)
## 
## Linear Hypotheses:
##                                       Estimate Std. Error z value Pr(>|z|)    
## Meta-analytical - Empirical == 0      -0.80657    0.16750  -4.816   <0.001 ***
## Methodological - Empirical == 0       -0.20044    0.47630  -0.421   0.9979    
## Other - Empirical == 0                -0.45175    0.50853  -0.888   0.9386    
## Review - Empirical == 0               -0.03054    0.27005  -0.113   1.0000    
## Theoretical - Empirical == 0          -0.11343    0.42294  -0.268   0.9998    
## Methodological - Meta-analytical == 0  0.60614    0.49562   1.223   0.7972    
## Other - Meta-analytical == 0           0.35482    0.52667   0.674   0.9812    
## Review - Meta-analytical == 0          0.77603    0.30283   2.563   0.0896 .  
## Theoretical - Meta-analytical == 0     0.69315    0.44459   1.559   0.5832    
## Other - Methodological == 0           -0.25131    0.69007  -0.364   0.9989    
## Review - Methodological == 0           0.16990    0.53899   0.315   0.9995    
## Theoretical - Methodological == 0      0.08701    0.62965   0.138   1.0000    
## Review - Other == 0                    0.42121    0.56767   0.742   0.9712    
## Theoretical - Other == 0               0.33833    0.65438   0.517   0.9944    
## Theoretical - Review == 0             -0.08289    0.49247  -0.168   1.0000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

cld(glht(glm_introduction,
         linfct = mcp(article_type = "Tukey")),
    test = adjusted("none"),
    letters = Letters)

##       Empirical Meta-analytical  Methodological           Other          Review 
##             "a"             "b"            "ab"            "ab"            "ab" 
##     Theoretical 
##            "ab"

Model for methods section

glm_methods <-
  x %>% 
  glm(data = .,
      cbind(methods,
            total - methods) ~ 
        article_type - 1,
      family = "binomial")

summary(glht(glm_methods,
             linfct = mcp(article_type = "Tukey")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = cbind(methods, total - methods) ~ article_type - 
##     1, family = "binomial", data = .)
## 
## Linear Hypotheses:
##                                         Estimate Std. Error z value Pr(>|z|)
## Meta-analytical - Empirical == 0          1.6534     0.2186   7.565   <1e-04
## Methodological - Empirical == 0           1.4639     0.5841   2.506   0.0861
## Other - Empirical == 0                    0.7707     0.7689   1.002   0.8855
## Review - Empirical == 0                  -0.2101     0.6090  -0.345   0.9990
## Theoretical - Empirical == 0            -23.1669 52786.2621   0.000   1.0000
## Methodological - Meta-analytical == 0    -0.1895     0.5911  -0.321   0.9993
## Other - Meta-analytical == 0             -0.8826     0.7742  -1.140   0.8185
## Review - Meta-analytical == 0            -1.8635     0.6158  -3.026   0.0197
## Theoretical - Meta-analytical == 0      -24.8202 52786.2621   0.000   1.0000
## Other - Methodological == 0              -0.6931     0.9449  -0.734   0.9676
## Review - Methodological == 0             -1.6740     0.8201  -2.041   0.2477
## Theoretical - Methodological == 0       -24.6307 52786.2621   0.000   1.0000
## Review - Other == 0                      -0.9808     0.9605  -1.021   0.8774
## Theoretical - Other == 0                -23.9376 52786.2621   0.000   1.0000
## Theoretical - Review == 0               -22.9568 52786.2621   0.000   1.0000
##                                          
## Meta-analytical - Empirical == 0      ***
## Methodological - Empirical == 0       .  
## Other - Empirical == 0                   
## Review - Empirical == 0                  
## Theoretical - Empirical == 0             
## Methodological - Meta-analytical == 0    
## Other - Meta-analytical == 0             
## Review - Meta-analytical == 0         *  
## Theoretical - Meta-analytical == 0       
## Other - Methodological == 0              
## Review - Methodological == 0             
## Theoretical - Methodological == 0        
## Review - Other == 0                      
## Theoretical - Other == 0                 
## Theoretical - Review == 0                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

cld(glht(glm_methods,
         linfct = mcp(article_type = "Tukey")),
    test = adjusted("none"),
    letters = Letters)

##       Empirical Meta-analytical  Methodological           Other          Review 
##             "a"             "b"            "ab"            "ab"             "a" 
##     Theoretical 
##            "ab"

Model for discusssion section

glm_discussion <-
  x %>% 
  glm(data = .,
      cbind(`results/discussion/conclusion`,
            total - `results/discussion/conclusion`) ~ 
        article_type - 1,
      family = "binomial")

cld(glht(glm_discussion,
         linfct = mcp(article_type = "Tukey")),
    test = adjusted("none"),
    letters = Letters)

##       Empirical Meta-analytical  Methodological           Other          Review 
##             "a"             "a"             "a"             "a"             "a" 
##     Theoretical 
##             "a"

summary(glht(glm_discussion,
             linfct = mcp(article_type = "Tukey")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = cbind(`results/discussion/conclusion`, total - 
##     `results/discussion/conclusion`) ~ article_type - 1, family = "binomial", 
##     data = .)
## 
## Linear Hypotheses:
##                                        Estimate Std. Error z value Pr(>|z|)
## Meta-analytical - Empirical == 0       0.009737   0.165243   0.059    1.000
## Methodological - Empirical == 0       -0.500036   0.530809  -0.942    0.922
## Other - Empirical == 0                 0.204161   0.508727   0.401    0.998
## Review - Empirical == 0                0.078181   0.273994   0.285    1.000
## Theoretical - Empirical == 0           0.368464   0.423175   0.871    0.943
## Methodological - Meta-analytical == 0 -0.509772   0.547169  -0.932    0.925
## Other - Meta-analytical == 0           0.194425   0.525774   0.370    0.999
## Review - Meta-analytical == 0          0.068445   0.304481   0.225    1.000
## Theoretical - Meta-analytical == 0     0.358728   0.443523   0.809    0.958
## Other - Methodological == 0            0.704197   0.728623   0.966    0.914
## Review - Methodological == 0           0.578217   0.589204   0.981    0.908
## Theoretical - Methodological == 0      0.868500   0.671689   1.293    0.756
## Review - Other == 0                   -0.125980   0.569391  -0.221    1.000
## Theoretical - Other == 0               0.164303   0.654378   0.251    1.000
## Theoretical - Review == 0              0.290283   0.494450   0.587    0.990
## (Adjusted p values reported -- single-step method)

Fig. 3

### fig 3a ----
fig_3a <- 
  df_references %>% 
  distinct(article_id,
           meta_ref_id,
           meta_ref_section_adj,
           .keep_all = T) %>%
  left_join(df_articles %>% 
              dplyr::select(article_id,
                            article_type)) %>%
  filter(is_meta_ref_meta_analysis,
         !is.na(meta_ref_section_adj),
         meta_ref_section_adj != "other",
         article_type != "invalid") %>%
  count(article_type,
        meta_ref_section_adj) %>% 
  group_by(article_type) %>%
  mutate(prop = n / sum(n)) %>% 
  ggplot(aes(x = article_type,
             y = prop,
             group = meta_ref_section_adj,
             fill = meta_ref_section_adj,
             label = n)) +
  geom_col() +
  geom_label(hjust = 0.5,
             position = position_stack(vjust = 0.5),
             col = "black",
             fill = "white",
             label.r = unit(0.4, "lines"),
             label.padding = unit(0.15, 
                                  "lines"),
             size = 3.5) +
  scale_fill_manual(values = c("grey90", 
                               "grey60", 
                               "grey30")) +
  scale_y_continuous(breaks = seq(0, 
                                  1, 
                                  by = 0.2),
                     limits = c(0, 1)) +
  labs(pattern = "Section",
       pattern_angle = "Section",
       pattern_density = "Section",
       pattern_spacing = "Section",
       fill = "Section",
       x = "Article type",
       y = "Proportion of articles") +
  theme_classic() +
  theme(axis.title.x = element_text(margin = margin(t = 0.3, 
                                                    unit = "cm"),
                                    size = 12),
        axis.title.y = element_text(margin = margin(r = 0.3, 
                                                    unit = "cm"),
                                    size = 12),
        axis.text = element_text(size = 9),
        legend.title = element_text(size = 12),
        legend.text = element_text(size = 9),
        legend.position = "top",
        plot.tag.position = c(0.13, 
                              0.93))


### fig 3b ----
res_quotes <- 
  df_quotes %>%
  count(reports_any_result,
        reports_quantitative_result,
        reports_limitation) %>% 
  mutate(prop = n/nrow(df_quotes))

fig_3b <-
  res_quotes %>%
  filter(reports_any_result) %>% 
  summarise(n = sum(n),
            value = sum(prop)) %>% 
  bind_rows(res_quotes %>%
              filter(reports_quantitative_result) %>% 
              summarise(n = sum(n),
                        value = sum(prop))) %>% 
  bind_rows(res_quotes %>%
              filter(reports_limitation) %>% 
              summarise(n = sum(n),
                        value = sum(prop))) %>% 
  mutate(x = factor(c("Any result",
                      "Quantitative result",
                      "Limitation"),
                    levels = c("Any result",
                               "Quantitative result",
                               "Limitation"))) %>% 
  ggplot(aes(x = x,
             y = value,
             label = n)) +
  geom_col(fill = "black") +
  geom_label(hjust = 0.5,
             position = position_stack(vjust = 0.5),
             col = "black",
             fill = "white",
             label.r = unit(0.4,
                            "lines"),
             label.padding = unit(0.15, 
                                  "lines"),
             size = 3.5) +
  scale_y_continuous(breaks = seq(0, 
                                  1, 
                                  by = 0.2),
                     limits = c(0, 1)) +
  labs(x = "Content type",
       y = "Proportion of quotations") +
  theme_classic() +
  theme(axis.title.x = element_text(margin = margin(t = 0.3, unit = "cm"),
                                    size = 12),
        axis.title.y = element_text(margin = margin(r = 0.3, unit = "cm"),
                                    size = 12),
        axis.text = element_text(size = 9),
        plot.tag.position = c(0.13, 0.95))

### joined figures ----
(fig_3 <-
   fig_3a +
   fig_3b +
   plot_layout(nrow = 2) +
   plot_annotation(tag_levels = "A"))

# ggsave("fig_3.jpg",
#        fig_3,
#        bg = "white",
#        dpi = 600,
#        width = 7,
#        height = 7*1.41*0.6,
#        units = "in")

Quote content

Proportion of quotes on meta-analyses’ results

df_quotes %>% 
  count(reports_any_result,
        reports_quantitative_result) %>% 
  mutate(prop = n/nrow(df_quotes))

## # A tibble: 3 × 4
##   reports_any_result reports_quantitative_result     n   prop
##   <lgl>              <lgl>                       <int>  <dbl>
## 1 FALSE              FALSE                         377 0.229 
## 2 TRUE               FALSE                        1196 0.727 
## 3 TRUE               TRUE                           73 0.0443

Proportion of quotes on meta-analyses’ limitations

## proportion of quotes about limitation ----
df_quotes %>% 
  count(reports_limitation) %>% 
  mutate(prop = n/nrow(df_quotes))

## # A tibble: 2 × 3
##   reports_limitation     n   prop
##   <lgl>              <int>  <dbl>
## 1 FALSE               1601 0.973 
## 2 TRUE                  45 0.0273

Efficiency of detecting meta-references

df_articles %>%
  filter(manually_verified) %>%
  mutate(auto_meta = str_detect(title,
                                "meta-an|metaan|meta-reg|metareg")) %>% 
  count(auto_meta,
        article_type)

## # A tibble: 9 × 3
##   auto_meta article_type        n
##   <lgl>     <fct>           <int>
## 1 FALSE     Empirical         499
## 2 FALSE     Invalid             7
## 3 FALSE     Meta-analytical    25
## 4 FALSE     Methodological     16
## 5 FALSE     Other              17
## 6 FALSE     Review             82
## 7 FALSE     Theoretical        17
## 8 TRUE      Meta-analytical    22
## 9 TRUE      Methodological      1

tp <- 22
fn <- 25
fp <- 1
tn <- 638

Precision

#### precision ----
tp/(tp+fp)

## [1] 0.9565217

Sensitivity

#### sensitivity ----
tp/(tp+fn)

## [1] 0.4680851

Accuracy

#### accuracy ----
(tp+tn)/(tp+fp+tn+fn)

## [1] 0.9620991

Harnessing meta-analyses’ insights in ecology and evolution research

Pietro Pollo

2025-08-16

Overview of articles and journals

Number of articles

Total

Number of articles without any references

Manually inspected

Number of journals

Total

Manually inspected

Number of articles retrieved that cited at least one meta-analysis

Proportion of meta-references from all references (automated)

True meta-references

Proportion of true meta-references from all meta-references

Model on absolute number of true meta-references

Model on all references

Model on relative number of true meta-references to all references

Fig. 2

Fig. S2

Meta-references per manuscript section

Model for introduction section

Model for methods section

Model for discusssion section

Fig. 3

Quote content

Proportion of quotes on meta-analyses’ results

Proportion of quotes on meta-analyses’ limitations

Efficiency of detecting meta-references

Precision

Sensitivity

Accuracy