if (!require("pacman")) {install.packages("pacman")}
pacman::p_load(broom,
               broom.mixed,
               emmeans,
               ggtext,
               janitor,
               lme4,
               multcomp,
               multcompView,
               patchwork,
               tidyverse)

# load data ----
df_articles <- read_csv("per_article.csv") %>% 
  mutate(article_type = as.factor(str_to_sentence(article_type)))
df_references <- read_csv("per_reference.csv")
df_quotes <- read_csv("per_quote.csv")

Overview of articles and journals

Number of articles

Total

df_articles %>% 
  count()
## # A tibble: 1 × 1
##       n
##   <int>
## 1 17145

Number of articles without any references

df_articles %>% 
  count(n_all_refs_lens == 0)
## # A tibble: 2 × 2
##   `n_all_refs_lens == 0`     n
##   <lgl>                  <int>
## 1 FALSE                  15752
## 2 TRUE                    1393

Manually inspected

df_articles %>% 
  count(manually_verified,
        include_analysis)
## # A tibble: 3 × 3
##   manually_verified include_analysis     n
##   <lgl>             <lgl>            <int>
## 1 FALSE             FALSE            16459
## 2 TRUE              FALSE               41
## 3 TRUE              TRUE               645

Number of journals

Total

df_articles %>% 
  count(source_title) %>%
  count()
## # A tibble: 1 × 1
##       n
##   <int>
## 1   144

Manually inspected

df_articles %>% 
  filter(manually_verified) %>% 
  count(source_title) %>%
  count()
## # A tibble: 1 × 1
##       n
##   <int>
## 1   120

Number of articles retrieved that cited at least one meta-analysis

df_articles %>% 
  filter(n_all_refs_lens > 0) %>% 
  group_by(detected_meta = n_meta_refs_auto > 0) %>% 
  summarise(n = n(),
            prop = n / nrow(df_articles %>% 
                              filter(n_all_refs_lens > 0)))
## # A tibble: 2 × 3
##   detected_meta     n  prop
##   <lgl>         <int> <dbl>
## 1 FALSE         12414 0.788
## 2 TRUE           3338 0.212

Proportion of meta-references from all references (automated)

df_articles %>% 
  filter(n_all_refs_lens != 0) %>% 
  mutate(prop_meta_refs = n_meta_refs_auto / n_all_refs_lens) %>% 
  summarise(mean_prop = mean(prop_meta_refs),
            se_prop = sd(prop_meta_refs) / sqrt(nrow(df_articles %>% 
                                                       filter(n_all_refs_lens != 0))))
## # A tibble: 1 × 2
##   mean_prop  se_prop
##       <dbl>    <dbl>
## 1   0.00618 0.000157

True meta-references

Proportion of true meta-references from all meta-references

df_articles %>% 
  filter(include_analysis) %>%
  summarise(prop = sum(n_true_meta_refs) / sum(n_meta_refs_manual))
## # A tibble: 1 × 1
##    prop
##   <dbl>
## 1 0.836

Model on absolute number of true meta-references

glmer_n_true_neg_bin <-
  df_articles %>% 
  filter(include_analysis) %>% 
  glmer.nb(data = .,
           n_true_meta_refs ~ 
             article_type - 1 +
             (1|source_title))

summary(glht(glmer_n_true_neg_bin,
             linfct = mcp(article_type = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lme4::glmer(formula = n_true_meta_refs ~ article_type - 1 + (1 | 
##     source_title), data = ., family = MASS::negative.binomial(theta = 71468.4760023709))
## 
## Linear Hypotheses:
##                                       Estimate Std. Error z value Pr(>|z|)    
## Meta-analytical - Empirical == 0       0.68456    0.09807   6.980  < 0.001 ***
## Methodological - Empirical == 0       -0.17519    0.23145  -0.757  0.96904    
## Other - Empirical == 0                 0.10385    0.21195   0.490  0.99569    
## Review - Empirical == 0                0.09367    0.09682   0.967  0.91471    
## Theoretical - Empirical == 0          -0.23794    0.23239  -1.024  0.89382    
## Methodological - Meta-analytical == 0 -0.85975    0.24532  -3.505  0.00469 ** 
## Other - Meta-analytical == 0          -0.58071    0.22789  -2.548  0.09346 .  
## Review - Meta-analytical == 0         -0.59089    0.12551  -4.708  < 0.001 ***
## Theoretical - Meta-analytical == 0    -0.92250    0.24526  -3.761  0.00198 ** 
## Other - Methodological == 0            0.27904    0.30926   0.902  0.93541    
## Review - Methodological == 0           0.26886    0.24524   1.096  0.86321    
## Theoretical - Methodological == 0     -0.06275    0.32312  -0.194  0.99995    
## Review - Other == 0                   -0.01018    0.22722  -0.045  1.00000    
## Theoretical - Other == 0              -0.34179    0.30921  -1.105  0.85887    
## Theoretical - Review == 0             -0.33161    0.24651  -1.345  0.72683    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
cld(glht(glmer_n_true_neg_bin,
         linfct = mcp(article_type = "Tukey")),
    test = adjusted("none"),
    letters = Letters)
##       Empirical Meta-analytical  Methodological           Other          Review 
##             "a"             "b"             "a"            "ab"             "a" 
##     Theoretical 
##             "a"

Model on all references

glmer_n_all_neg_bin <-
  df_articles %>% 
  filter(include_analysis) %>% 
  glmer.nb(data = .,
           n_all_refs_wos ~ 
             article_type - 1 +
             (1|source_title))

summary(glht(glmer_n_all_neg_bin,
             linfct = mcp(article_type = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lme4::glmer(formula = n_all_refs_wos ~ article_type - 1 + (1 | 
##     source_title), data = ., family = MASS::negative.binomial(theta = 8.35756459311107))
## 
## Linear Hypotheses:
##                                        Estimate Std. Error z value Pr(>|z|)    
## Meta-analytical - Empirical == 0       0.281716   0.062223   4.528   <0.001 ***
## Methodological - Empirical == 0        0.009627   0.099975   0.096   1.0000    
## Other - Empirical == 0                 0.094336   0.103710   0.910   0.9349    
## Review - Empirical == 0                0.464147   0.048824   9.507   <0.001 ***
## Theoretical - Empirical == 0          -0.036831   0.098903  -0.372   0.9989    
## Methodological - Meta-analytical == 0 -0.272089   0.114548  -2.375   0.1460    
## Other - Meta-analytical == 0          -0.187380   0.118026  -1.588   0.5730    
## Review - Meta-analytical == 0          0.182431   0.074145   2.460   0.1194    
## Theoretical - Meta-analytical == 0    -0.318548   0.112232  -2.838   0.0446 *  
## Other - Methodological == 0            0.084709   0.141819   0.597   0.9895    
## Review - Methodological == 0           0.454520   0.108474   4.190   <0.001 ***
## Theoretical - Methodological == 0     -0.046459   0.136515  -0.340   0.9993    
## Review - Other == 0                    0.369810   0.111623   3.313   0.0103 *  
## Theoretical - Other == 0              -0.131168   0.140351  -0.935   0.9275    
## Theoretical - Review == 0             -0.500978   0.107703  -4.651   <0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
cld(glht(glmer_n_all_neg_bin,
         linfct = mcp(article_type = "Tukey")),
    test = adjusted("none"),
    letters = Letters)
##       Empirical Meta-analytical  Methodological           Other          Review 
##             "a"            "bc"            "ac"            "ac"             "b" 
##     Theoretical 
##             "a"

Model on relative number of true meta-references to all references

glmer_rel_true <-
  df_articles %>% 
  filter(include_analysis) %>% 
  glmer(data = .,
        cbind(n_true_meta_refs,
              n_all_refs_wos - n_true_meta_refs) ~ 
          article_type - 1 +
          (1|source_title),
        family = "binomial")

summary(glht(glmer_rel_true,
             linfct = mcp(article_type = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glmer(formula = cbind(n_true_meta_refs, n_all_refs_wos - n_true_meta_refs) ~ 
##     article_type - 1 + (1 | source_title), data = ., family = "binomial")
## 
## Linear Hypotheses:
##                                       Estimate Std. Error z value Pr(>|z|)    
## Meta-analytical - Empirical == 0       0.41973    0.10276   4.085  < 0.001 ***
## Methodological - Empirical == 0       -0.20019    0.23356  -0.857  0.94800    
## Other - Empirical == 0                 0.01374    0.21651   0.063  1.00000    
## Review - Empirical == 0               -0.38418    0.10092  -3.807  0.00153 ** 
## Theoretical - Empirical == 0          -0.20263    0.23601  -0.859  0.94760    
## Methodological - Meta-analytical == 0 -0.61992    0.24790  -2.501  0.10580    
## Other - Meta-analytical == 0          -0.40599    0.23280  -1.744  0.46207    
## Review - Meta-analytical == 0         -0.80391    0.13092  -6.141  < 0.001 ***
## Theoretical - Meta-analytical == 0    -0.62236    0.24870  -2.502  0.10524    
## Other - Methodological == 0            0.21393    0.31364   0.682  0.98053    
## Review - Methodological == 0          -0.18399    0.24849  -0.740  0.97206    
## Theoretical - Methodological == 0     -0.00244    0.32515  -0.008  1.00000    
## Review - Other == 0                   -0.39792    0.23253  -1.711  0.48363    
## Theoretical - Other == 0              -0.21637    0.31451  -0.688  0.97976    
## Theoretical - Review == 0              0.18155    0.25118   0.723  0.97484    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
emm <- emmeans(glmer_rel_true, 
               ~ article_type, 
               type = "response")

emm
##  article_type      prob       SE  df asymp.LCL asymp.UCL
##  Empirical       0.0193 0.000893 Inf    0.0176    0.0211
##  Meta-analytical 0.0290 0.002870 Inf    0.0239    0.0352
##  Methodological  0.0158 0.003620 Inf    0.0101    0.0247
##  Other           0.0195 0.004110 Inf    0.0129    0.0294
##  Review          0.0132 0.001280 Inf    0.0109    0.0160
##  Theoretical     0.0158 0.003640 Inf    0.0100    0.0248
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
cld(glht(glmer_rel_true,
         linfct = mcp(article_type = "Tukey")),
    test = adjusted("none"),
    letters = Letters)
##       Empirical Meta-analytical  Methodological           Other          Review 
##             "a"             "b"           "abc"           "abc"             "c" 
##     Theoretical 
##           "abc"

Fig. 2

# fig 2a ----
fig_2a <-
  df_articles %>% 
  filter(include_analysis) %>% 
  ggplot(aes(x = article_type,
             y = n_true_meta_refs)) +
  geom_boxplot(outliers = F) +
  # geom_jitter(height = 0,
  #             width = 0.2,
  #             alpha = 0.3) +
  geom_point(data = tidy(glmer_n_true_neg_bin) %>%
               mutate(corrected_est = exp(estimate),
                      term = str_remove(term,
                                        "article_type")) %>% 
               filter(is.na(group)),
             aes(x = term,
                 y = corrected_est),
             col = "red",
             size = 2) +
  annotate("text",
           x = c(1:6),
           y = -0.05,
           label = c("a",
                     "b",
                     "a",
                     "ab",
                     "a",
                     "a")) +
  annotate("text",
           x = c(1:6),
           y = 13,
           label = paste0("N = ",
                          df_articles %>% 
                            filter(include_analysis) %>% 
                            count(article_type) %>% 
                            pull(n))) +
  labs(x = "Article type",
       y = "Number of true meta-references") +
  scale_y_continuous(limits = c(-1, 
                                14),
                     breaks = c(0, 
                                5, 
                                10)) +
  theme_classic() +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_text(margin = margin(r = 0.3, 
                                                    unit = "cm"),
                                    size = 12),
        axis.text = element_text(size = 9),
        plot.tag.position = c(0.14, 
                              0.95))

# fig 2b ----
fig_2b <-
  df_articles %>% 
  filter(include_analysis) %>% 
  ggplot(aes(x = article_type,
             y = n_all_refs_wos)) +
  geom_boxplot(outliers = F) +
  # geom_jitter(height = 0,
  #             width = 0.2,
  #             alpha = 0.3) +
  geom_point(data = tidy(glmer_n_all_neg_bin) %>%
               mutate(corrected_est = exp(estimate),
                      term = str_remove(term,
                                        "article_type")) %>% 
               filter(is.na(group)),
             aes(x = term,
                 y = corrected_est),
             col = "red",
             size = 2) +
  annotate("text",
           x = c(1:6),
           y = -1,
           label = c("a",
                     "bc",
                     "ac",
                     "ac",
                     "b",
                     "a")) +
  labs(x = "Article type",
       y = "Total number of references") +
  scale_y_continuous(limits = c(-1, 
                                270),
                     breaks = seq(0, 
                                  500, 
                                  100)) +
  theme_classic() +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_text(margin = margin(r = 0.3, unit = "cm"),
                                    size = 12),
        axis.text = element_text(size = 9),
        plot.tag.position = c(0.14, 0.95))

# fig 2c ----
fig_2c <- 
  df_articles %>% 
  filter(include_analysis) %>% 
  ggplot(aes(x = article_type,
             y = rel_true_meta_refs)) +
  geom_boxplot(outliers = F) +
  # geom_jitter(height = 0,
  #             width = 0.2,
  #             alpha = 0.3) +
  geom_point(data = as.data.frame(emmeans(glmer_rel_true,
                                          ~ article_type, 
                                          type = "response")),
             aes(x = article_type,
                 y = prob),
             col = "red",
             size = 2) +
  annotate("text",
           x = c(1:6),
           y = -0.01,
           label = c("a",
                     "b",
                     rep("abc", 2),
                     "c",
                     "abc")) +
  labs(x = "Article type",
       y = "Proportion of true meta-references\namong all references") +
  scale_y_continuous(limits = c(-0.02, 
                                0.09),
                     breaks = c(0, 
                                0.05)) +
  theme_classic() +
  theme(axis.title.x = element_text(margin = margin(t = 0.3, unit = "cm"),
                                    size = 12),
        axis.title.y = element_text(margin = margin(r = 0.3, unit = "cm"),
                                    size = 12),
        axis.text = element_text(size = 9),
        plot.tag.position = c(0.14, 
                              0.95))

(fig_2 <-
    fig_2a + 
    fig_2b + 
    fig_2c + 
    plot_layout(nrow = 3) +
    plot_annotation(tag_levels = "A"))

ggsave("fig_2.jpg",
       fig_2,
       bg = "white",
       dpi = 600,
       width = 7,
       height = 7*1.41*0.85,
       units = "in")

Fig. S2

glmer_jif_prop <-
  df_articles %>% 
  filter(include_analysis) %>% 
  mutate(jif_2022_s = scale(jif_2022)) %>% 
  glmer(data = .,
        cbind(n_true_meta_refs,
              n_all_refs_wos - n_true_meta_refs) ~ 
          jif_2022_s +
          (1|source_title) +
          (1|article_type),
        family = "binomial")

summary(glmer_jif_prop)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(n_true_meta_refs, n_all_refs_wos - n_true_meta_refs) ~  
##     jif_2022_s + (1 | source_title) + (1 | article_type)
##    Data: .
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    2021.2    2039.0   -1006.6    2013.2       641 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7957 -0.4998 -0.1458  0.4313  5.7613 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  source_title (Intercept) 0.06568  0.2563  
##  article_type (Intercept) 0.06345  0.2519  
## Number of obs: 645, groups:  source_title, 120; article_type, 6
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.97867    0.12412 -32.054   <2e-16 ***
## jif_2022_s   0.02581    0.04818   0.536    0.592    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## jif_2022_s 0.007
emmeans_df <- 
  tibble(as.data.frame(emmeans(glmer_jif_prop,
                               ~ jif_2022_s,
                               at = list(jif_2022_s = seq(min(scale(df_articles %>% 
                                                                      filter(include_analysis) %>% 
                                                                      pull(jif_2022))), 
                                                          max((scale(df_articles %>% 
                                                                       filter(include_analysis) %>% 
                                                                       pull(jif_2022)))), 
                                                          length.out = 100)),
                               type = "response"))) %>% 
  mutate(jif_2022 = (jif_2022_s * 
                       sd(df_articles %>% 
                            filter(include_analysis) %>% 
                            pull(jif_2022))) +
           mean(df_articles %>% 
                  filter(include_analysis) %>% 
                  pull(jif_2022)))

## fig. s2 ----

(fig_s2 <- 
   df_articles %>% 
   filter(include_analysis) %>% 
   # mutate(jif_2022_s = scale(jif_2022)) %>% 
   ggplot(aes(x = jif_2022,
              y = rel_true_meta_refs)) +
   geom_jitter(height = 0,
               width = 0.2,
               alpha = 0.3) +
   geom_smooth(data = emmeans_df,
               aes(x = jif_2022,
                   y = prob),
               col = "red") +
   geom_smooth(data = emmeans_df,
               aes(x = jif_2022,
                   y = asymp.LCL),
               linetype = "dashed",
               col = "red",
               alpha = 0.3) +
   geom_smooth(data = emmeans_df,
               aes(x = jif_2022,
                   y = asymp.UCL),
               linetype = "dashed",
               col = "red",
               alpha = 0.3) +
   labs(x = "Clarivate's 2022 journal impact factor",
        y = "Proportion of true meta references\namong all references",
        col = "Article type") +
   theme_classic() +
   theme(axis.title.x = element_text(margin = margin(t = 0.3, unit = "cm"),
                                     size = 12),
         axis.title.y = element_text(margin = margin(r = 0.3, unit = "cm"),
                                     size = 12),
         axis.text = element_text(size = 9)))

# ggsave("fig_s2.jpg",
#        fig_s2,
#        bg = "white",
#        dpi = 600,
#        width = 7,
#        height = 7*1.41*0.85*0.5,
#        units = "in")

Meta-references per manuscript section

x <-
  df_references %>% 
  distinct(article_id,
           meta_ref_id,
           meta_ref_section_adj,
           .keep_all = T) %>%
  left_join(df_articles %>% 
              dplyr::select(article_id,
                            article_type)) %>%
  filter(is_meta_ref_meta_analysis,
         !is.na(meta_ref_section_adj),
         meta_ref_section_adj != "other",
         article_type != "invalid") %>%
  count(article_type,
        meta_ref_section_adj) %>% 
  pivot_wider(names_from = meta_ref_section_adj,
              values_from = n) %>% 
  mutate(methods = replace_na(methods, 0),
         total = introduction + methods + `results/discussion/conclusion`,
         prop_intro = introduction / total,
         prop_methods = methods / total,
         prop_res = `results/discussion/conclusion` / total)

Model for introduction section

glm_introduction <-
  x %>% 
  glm(data = .,
      cbind(introduction,
            total - introduction) ~ 
        article_type - 1,
      family = "binomial")

summary(glht(glm_introduction,
             linfct = mcp(article_type = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = cbind(introduction, total - introduction) ~ article_type - 
##     1, family = "binomial", data = .)
## 
## Linear Hypotheses:
##                                       Estimate Std. Error z value Pr(>|z|)    
## Meta-analytical - Empirical == 0      -0.80657    0.16750  -4.816   <0.001 ***
## Methodological - Empirical == 0       -0.20044    0.47630  -0.421   0.9979    
## Other - Empirical == 0                -0.45175    0.50853  -0.888   0.9386    
## Review - Empirical == 0               -0.03054    0.27005  -0.113   1.0000    
## Theoretical - Empirical == 0          -0.11343    0.42294  -0.268   0.9998    
## Methodological - Meta-analytical == 0  0.60614    0.49562   1.223   0.7972    
## Other - Meta-analytical == 0           0.35482    0.52667   0.674   0.9812    
## Review - Meta-analytical == 0          0.77603    0.30283   2.563   0.0896 .  
## Theoretical - Meta-analytical == 0     0.69315    0.44459   1.559   0.5832    
## Other - Methodological == 0           -0.25131    0.69007  -0.364   0.9989    
## Review - Methodological == 0           0.16990    0.53899   0.315   0.9995    
## Theoretical - Methodological == 0      0.08701    0.62965   0.138   1.0000    
## Review - Other == 0                    0.42121    0.56767   0.742   0.9712    
## Theoretical - Other == 0               0.33833    0.65438   0.517   0.9944    
## Theoretical - Review == 0             -0.08289    0.49247  -0.168   1.0000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
cld(glht(glm_introduction,
         linfct = mcp(article_type = "Tukey")),
    test = adjusted("none"),
    letters = Letters)
##       Empirical Meta-analytical  Methodological           Other          Review 
##             "a"             "b"            "ab"            "ab"            "ab" 
##     Theoretical 
##            "ab"

Model for methods section

glm_methods <-
  x %>% 
  glm(data = .,
      cbind(methods,
            total - methods) ~ 
        article_type - 1,
      family = "binomial")

summary(glht(glm_methods,
             linfct = mcp(article_type = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = cbind(methods, total - methods) ~ article_type - 
##     1, family = "binomial", data = .)
## 
## Linear Hypotheses:
##                                         Estimate Std. Error z value Pr(>|z|)
## Meta-analytical - Empirical == 0          1.6534     0.2186   7.565   <1e-04
## Methodological - Empirical == 0           1.4639     0.5841   2.506   0.0861
## Other - Empirical == 0                    0.7707     0.7689   1.002   0.8855
## Review - Empirical == 0                  -0.2101     0.6090  -0.345   0.9990
## Theoretical - Empirical == 0            -23.1669 52786.2621   0.000   1.0000
## Methodological - Meta-analytical == 0    -0.1895     0.5911  -0.321   0.9993
## Other - Meta-analytical == 0             -0.8826     0.7742  -1.140   0.8185
## Review - Meta-analytical == 0            -1.8635     0.6158  -3.026   0.0197
## Theoretical - Meta-analytical == 0      -24.8202 52786.2621   0.000   1.0000
## Other - Methodological == 0              -0.6931     0.9449  -0.734   0.9676
## Review - Methodological == 0             -1.6740     0.8201  -2.041   0.2477
## Theoretical - Methodological == 0       -24.6307 52786.2621   0.000   1.0000
## Review - Other == 0                      -0.9808     0.9605  -1.021   0.8774
## Theoretical - Other == 0                -23.9376 52786.2621   0.000   1.0000
## Theoretical - Review == 0               -22.9568 52786.2621   0.000   1.0000
##                                          
## Meta-analytical - Empirical == 0      ***
## Methodological - Empirical == 0       .  
## Other - Empirical == 0                   
## Review - Empirical == 0                  
## Theoretical - Empirical == 0             
## Methodological - Meta-analytical == 0    
## Other - Meta-analytical == 0             
## Review - Meta-analytical == 0         *  
## Theoretical - Meta-analytical == 0       
## Other - Methodological == 0              
## Review - Methodological == 0             
## Theoretical - Methodological == 0        
## Review - Other == 0                      
## Theoretical - Other == 0                 
## Theoretical - Review == 0                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
cld(glht(glm_methods,
         linfct = mcp(article_type = "Tukey")),
    test = adjusted("none"),
    letters = Letters)
##       Empirical Meta-analytical  Methodological           Other          Review 
##             "a"             "b"            "ab"            "ab"             "a" 
##     Theoretical 
##            "ab"

Model for discusssion section

glm_discussion <-
  x %>% 
  glm(data = .,
      cbind(`results/discussion/conclusion`,
            total - `results/discussion/conclusion`) ~ 
        article_type - 1,
      family = "binomial")

cld(glht(glm_discussion,
         linfct = mcp(article_type = "Tukey")),
    test = adjusted("none"),
    letters = Letters)
##       Empirical Meta-analytical  Methodological           Other          Review 
##             "a"             "a"             "a"             "a"             "a" 
##     Theoretical 
##             "a"
summary(glht(glm_discussion,
             linfct = mcp(article_type = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = cbind(`results/discussion/conclusion`, total - 
##     `results/discussion/conclusion`) ~ article_type - 1, family = "binomial", 
##     data = .)
## 
## Linear Hypotheses:
##                                        Estimate Std. Error z value Pr(>|z|)
## Meta-analytical - Empirical == 0       0.009737   0.165243   0.059    1.000
## Methodological - Empirical == 0       -0.500036   0.530809  -0.942    0.922
## Other - Empirical == 0                 0.204161   0.508727   0.401    0.998
## Review - Empirical == 0                0.078181   0.273994   0.285    1.000
## Theoretical - Empirical == 0           0.368464   0.423175   0.871    0.943
## Methodological - Meta-analytical == 0 -0.509772   0.547169  -0.932    0.925
## Other - Meta-analytical == 0           0.194425   0.525774   0.370    0.999
## Review - Meta-analytical == 0          0.068445   0.304481   0.225    1.000
## Theoretical - Meta-analytical == 0     0.358728   0.443523   0.809    0.958
## Other - Methodological == 0            0.704197   0.728623   0.966    0.914
## Review - Methodological == 0           0.578217   0.589204   0.981    0.908
## Theoretical - Methodological == 0      0.868500   0.671689   1.293    0.756
## Review - Other == 0                   -0.125980   0.569391  -0.221    1.000
## Theoretical - Other == 0               0.164303   0.654378   0.251    1.000
## Theoretical - Review == 0              0.290283   0.494450   0.587    0.990
## (Adjusted p values reported -- single-step method)

Fig. 3

### fig 3a ----
fig_3a <- 
  df_references %>% 
  distinct(article_id,
           meta_ref_id,
           meta_ref_section_adj,
           .keep_all = T) %>%
  left_join(df_articles %>% 
              dplyr::select(article_id,
                            article_type)) %>%
  filter(is_meta_ref_meta_analysis,
         !is.na(meta_ref_section_adj),
         meta_ref_section_adj != "other",
         article_type != "invalid") %>%
  count(article_type,
        meta_ref_section_adj) %>% 
  group_by(article_type) %>%
  mutate(prop = n / sum(n)) %>% 
  ggplot(aes(x = article_type,
             y = prop,
             group = meta_ref_section_adj,
             fill = meta_ref_section_adj,
             label = n)) +
  geom_col() +
  geom_label(hjust = 0.5,
             position = position_stack(vjust = 0.5),
             col = "black",
             fill = "white",
             label.r = unit(0.4, "lines"),
             label.padding = unit(0.15, 
                                  "lines"),
             size = 3.5) +
  scale_fill_manual(values = c("grey90", 
                               "grey60", 
                               "grey30")) +
  scale_y_continuous(breaks = seq(0, 
                                  1, 
                                  by = 0.2),
                     limits = c(0, 1)) +
  labs(pattern = "Section",
       pattern_angle = "Section",
       pattern_density = "Section",
       pattern_spacing = "Section",
       fill = "Section",
       x = "Article type",
       y = "Proportion of articles") +
  theme_classic() +
  theme(axis.title.x = element_text(margin = margin(t = 0.3, 
                                                    unit = "cm"),
                                    size = 12),
        axis.title.y = element_text(margin = margin(r = 0.3, 
                                                    unit = "cm"),
                                    size = 12),
        axis.text = element_text(size = 9),
        legend.title = element_text(size = 12),
        legend.text = element_text(size = 9),
        legend.position = "top",
        plot.tag.position = c(0.13, 
                              0.93))


### fig 3b ----
res_quotes <- 
  df_quotes %>%
  count(reports_any_result,
        reports_quantitative_result,
        reports_limitation) %>% 
  mutate(prop = n/nrow(df_quotes))

fig_3b <-
  res_quotes %>%
  filter(reports_any_result) %>% 
  summarise(n = sum(n),
            value = sum(prop)) %>% 
  bind_rows(res_quotes %>%
              filter(reports_quantitative_result) %>% 
              summarise(n = sum(n),
                        value = sum(prop))) %>% 
  bind_rows(res_quotes %>%
              filter(reports_limitation) %>% 
              summarise(n = sum(n),
                        value = sum(prop))) %>% 
  mutate(x = factor(c("Any result",
                      "Quantitative result",
                      "Limitation"),
                    levels = c("Any result",
                               "Quantitative result",
                               "Limitation"))) %>% 
  ggplot(aes(x = x,
             y = value,
             label = n)) +
  geom_col(fill = "black") +
  geom_label(hjust = 0.5,
             position = position_stack(vjust = 0.5),
             col = "black",
             fill = "white",
             label.r = unit(0.4,
                            "lines"),
             label.padding = unit(0.15, 
                                  "lines"),
             size = 3.5) +
  scale_y_continuous(breaks = seq(0, 
                                  1, 
                                  by = 0.2),
                     limits = c(0, 1)) +
  labs(x = "Content type",
       y = "Proportion of quotations") +
  theme_classic() +
  theme(axis.title.x = element_text(margin = margin(t = 0.3, unit = "cm"),
                                    size = 12),
        axis.title.y = element_text(margin = margin(r = 0.3, unit = "cm"),
                                    size = 12),
        axis.text = element_text(size = 9),
        plot.tag.position = c(0.13, 0.95))

### joined figures ----
(fig_3 <-
   fig_3a +
   fig_3b +
   plot_layout(nrow = 2) +
   plot_annotation(tag_levels = "A"))

# ggsave("fig_3.jpg",
#        fig_3,
#        bg = "white",
#        dpi = 600,
#        width = 7,
#        height = 7*1.41*0.6,
#        units = "in")

Quote content

Proportion of quotes on meta-analyses’ results

df_quotes %>% 
  count(reports_any_result,
        reports_quantitative_result) %>% 
  mutate(prop = n/nrow(df_quotes))
## # A tibble: 3 × 4
##   reports_any_result reports_quantitative_result     n   prop
##   <lgl>              <lgl>                       <int>  <dbl>
## 1 FALSE              FALSE                         377 0.229 
## 2 TRUE               FALSE                        1196 0.727 
## 3 TRUE               TRUE                           73 0.0443

Proportion of quotes on meta-analyses’ limitations

## proportion of quotes about limitation ----
df_quotes %>% 
  count(reports_limitation) %>% 
  mutate(prop = n/nrow(df_quotes))
## # A tibble: 2 × 3
##   reports_limitation     n   prop
##   <lgl>              <int>  <dbl>
## 1 FALSE               1601 0.973 
## 2 TRUE                  45 0.0273

Efficiency of detecting meta-references

df_articles %>%
  filter(manually_verified) %>%
  mutate(auto_meta = str_detect(title,
                                "meta-an|metaan|meta-reg|metareg")) %>% 
  count(auto_meta,
        article_type)
## # A tibble: 9 × 3
##   auto_meta article_type        n
##   <lgl>     <fct>           <int>
## 1 FALSE     Empirical         499
## 2 FALSE     Invalid             7
## 3 FALSE     Meta-analytical    25
## 4 FALSE     Methodological     16
## 5 FALSE     Other              17
## 6 FALSE     Review             82
## 7 FALSE     Theoretical        17
## 8 TRUE      Meta-analytical    22
## 9 TRUE      Methodological      1
tp <- 22
fn <- 25
fp <- 1
tn <- 638

Precision

#### precision ----
tp/(tp+fp)
## [1] 0.9565217

Sensitivity

#### sensitivity ----
tp/(tp+fn)
## [1] 0.4680851

Accuracy

#### accuracy ----
(tp+tn)/(tp+fp+tn+fn)
## [1] 0.9620991