if (!require("pacman")) {install.packages("pacman")}
pacman::p_load(broom,
broom.mixed,
emmeans,
ggtext,
janitor,
lme4,
multcomp,
multcompView,
patchwork,
tidyverse)
# load data ----
df_articles <- read_csv("per_article.csv") %>%
mutate(article_type = as.factor(str_to_sentence(article_type)))
df_references <- read_csv("per_reference.csv")
df_quotes <- read_csv("per_quote.csv")
## # A tibble: 2 × 2
## `n_all_refs_lens == 0` n
## <lgl> <int>
## 1 FALSE 15752
## 2 TRUE 1393
df_articles %>%
filter(n_all_refs_lens > 0) %>%
group_by(detected_meta = n_meta_refs_auto > 0) %>%
summarise(n = n(),
prop = n / nrow(df_articles %>%
filter(n_all_refs_lens > 0)))
## # A tibble: 2 × 3
## detected_meta n prop
## <lgl> <int> <dbl>
## 1 FALSE 12414 0.788
## 2 TRUE 3338 0.212
df_articles %>%
filter(n_all_refs_lens != 0) %>%
mutate(prop_meta_refs = n_meta_refs_auto / n_all_refs_lens) %>%
summarise(mean_prop = mean(prop_meta_refs),
se_prop = sd(prop_meta_refs) / sqrt(nrow(df_articles %>%
filter(n_all_refs_lens != 0))))
## # A tibble: 1 × 2
## mean_prop se_prop
## <dbl> <dbl>
## 1 0.00618 0.000157
df_articles %>%
filter(include_analysis) %>%
summarise(prop = sum(n_true_meta_refs) / sum(n_meta_refs_manual))
## # A tibble: 1 × 1
## prop
## <dbl>
## 1 0.836
glmer_n_true_neg_bin <-
df_articles %>%
filter(include_analysis) %>%
glmer.nb(data = .,
n_true_meta_refs ~
article_type - 1 +
(1|source_title))
summary(glht(glmer_n_true_neg_bin,
linfct = mcp(article_type = "Tukey")))
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: lme4::glmer(formula = n_true_meta_refs ~ article_type - 1 + (1 |
## source_title), data = ., family = MASS::negative.binomial(theta = 71468.4760023709))
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## Meta-analytical - Empirical == 0 0.68456 0.09807 6.980 < 0.001 ***
## Methodological - Empirical == 0 -0.17519 0.23145 -0.757 0.96904
## Other - Empirical == 0 0.10385 0.21195 0.490 0.99569
## Review - Empirical == 0 0.09367 0.09682 0.967 0.91471
## Theoretical - Empirical == 0 -0.23794 0.23239 -1.024 0.89382
## Methodological - Meta-analytical == 0 -0.85975 0.24532 -3.505 0.00469 **
## Other - Meta-analytical == 0 -0.58071 0.22789 -2.548 0.09346 .
## Review - Meta-analytical == 0 -0.59089 0.12551 -4.708 < 0.001 ***
## Theoretical - Meta-analytical == 0 -0.92250 0.24526 -3.761 0.00198 **
## Other - Methodological == 0 0.27904 0.30926 0.902 0.93541
## Review - Methodological == 0 0.26886 0.24524 1.096 0.86321
## Theoretical - Methodological == 0 -0.06275 0.32312 -0.194 0.99995
## Review - Other == 0 -0.01018 0.22722 -0.045 1.00000
## Theoretical - Other == 0 -0.34179 0.30921 -1.105 0.85887
## Theoretical - Review == 0 -0.33161 0.24651 -1.345 0.72683
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
cld(glht(glmer_n_true_neg_bin,
linfct = mcp(article_type = "Tukey")),
test = adjusted("none"),
letters = Letters)
## Empirical Meta-analytical Methodological Other Review
## "a" "b" "a" "ab" "a"
## Theoretical
## "a"
glmer_n_all_neg_bin <-
df_articles %>%
filter(include_analysis) %>%
glmer.nb(data = .,
n_all_refs_wos ~
article_type - 1 +
(1|source_title))
summary(glht(glmer_n_all_neg_bin,
linfct = mcp(article_type = "Tukey")))
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: lme4::glmer(formula = n_all_refs_wos ~ article_type - 1 + (1 |
## source_title), data = ., family = MASS::negative.binomial(theta = 8.35756459311107))
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## Meta-analytical - Empirical == 0 0.281716 0.062223 4.528 <0.001 ***
## Methodological - Empirical == 0 0.009627 0.099975 0.096 1.0000
## Other - Empirical == 0 0.094336 0.103710 0.910 0.9349
## Review - Empirical == 0 0.464147 0.048824 9.507 <0.001 ***
## Theoretical - Empirical == 0 -0.036831 0.098903 -0.372 0.9989
## Methodological - Meta-analytical == 0 -0.272089 0.114548 -2.375 0.1460
## Other - Meta-analytical == 0 -0.187380 0.118026 -1.588 0.5730
## Review - Meta-analytical == 0 0.182431 0.074145 2.460 0.1194
## Theoretical - Meta-analytical == 0 -0.318548 0.112232 -2.838 0.0446 *
## Other - Methodological == 0 0.084709 0.141819 0.597 0.9895
## Review - Methodological == 0 0.454520 0.108474 4.190 <0.001 ***
## Theoretical - Methodological == 0 -0.046459 0.136515 -0.340 0.9993
## Review - Other == 0 0.369810 0.111623 3.313 0.0103 *
## Theoretical - Other == 0 -0.131168 0.140351 -0.935 0.9275
## Theoretical - Review == 0 -0.500978 0.107703 -4.651 <0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
cld(glht(glmer_n_all_neg_bin,
linfct = mcp(article_type = "Tukey")),
test = adjusted("none"),
letters = Letters)
## Empirical Meta-analytical Methodological Other Review
## "a" "bc" "ac" "ac" "b"
## Theoretical
## "a"
glmer_rel_true <-
df_articles %>%
filter(include_analysis) %>%
glmer(data = .,
cbind(n_true_meta_refs,
n_all_refs_wos - n_true_meta_refs) ~
article_type - 1 +
(1|source_title),
family = "binomial")
summary(glht(glmer_rel_true,
linfct = mcp(article_type = "Tukey")))
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: glmer(formula = cbind(n_true_meta_refs, n_all_refs_wos - n_true_meta_refs) ~
## article_type - 1 + (1 | source_title), data = ., family = "binomial")
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## Meta-analytical - Empirical == 0 0.41973 0.10276 4.085 < 0.001 ***
## Methodological - Empirical == 0 -0.20019 0.23356 -0.857 0.94800
## Other - Empirical == 0 0.01374 0.21651 0.063 1.00000
## Review - Empirical == 0 -0.38418 0.10092 -3.807 0.00153 **
## Theoretical - Empirical == 0 -0.20263 0.23601 -0.859 0.94760
## Methodological - Meta-analytical == 0 -0.61992 0.24790 -2.501 0.10580
## Other - Meta-analytical == 0 -0.40599 0.23280 -1.744 0.46207
## Review - Meta-analytical == 0 -0.80391 0.13092 -6.141 < 0.001 ***
## Theoretical - Meta-analytical == 0 -0.62236 0.24870 -2.502 0.10524
## Other - Methodological == 0 0.21393 0.31364 0.682 0.98053
## Review - Methodological == 0 -0.18399 0.24849 -0.740 0.97206
## Theoretical - Methodological == 0 -0.00244 0.32515 -0.008 1.00000
## Review - Other == 0 -0.39792 0.23253 -1.711 0.48363
## Theoretical - Other == 0 -0.21637 0.31451 -0.688 0.97976
## Theoretical - Review == 0 0.18155 0.25118 0.723 0.97484
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
## article_type prob SE df asymp.LCL asymp.UCL
## Empirical 0.0193 0.000893 Inf 0.0176 0.0211
## Meta-analytical 0.0290 0.002870 Inf 0.0239 0.0352
## Methodological 0.0158 0.003620 Inf 0.0101 0.0247
## Other 0.0195 0.004110 Inf 0.0129 0.0294
## Review 0.0132 0.001280 Inf 0.0109 0.0160
## Theoretical 0.0158 0.003640 Inf 0.0100 0.0248
##
## Confidence level used: 0.95
## Intervals are back-transformed from the logit scale
cld(glht(glmer_rel_true,
linfct = mcp(article_type = "Tukey")),
test = adjusted("none"),
letters = Letters)
## Empirical Meta-analytical Methodological Other Review
## "a" "b" "abc" "abc" "c"
## Theoretical
## "abc"
# fig 2a ----
fig_2a <-
df_articles %>%
filter(include_analysis) %>%
ggplot(aes(x = article_type,
y = n_true_meta_refs)) +
geom_boxplot(outliers = F) +
# geom_jitter(height = 0,
# width = 0.2,
# alpha = 0.3) +
geom_point(data = tidy(glmer_n_true_neg_bin) %>%
mutate(corrected_est = exp(estimate),
term = str_remove(term,
"article_type")) %>%
filter(is.na(group)),
aes(x = term,
y = corrected_est),
col = "red",
size = 2) +
annotate("text",
x = c(1:6),
y = -0.05,
label = c("a",
"b",
"a",
"ab",
"a",
"a")) +
annotate("text",
x = c(1:6),
y = 13,
label = paste0("N = ",
df_articles %>%
filter(include_analysis) %>%
count(article_type) %>%
pull(n))) +
labs(x = "Article type",
y = "Number of true meta-references") +
scale_y_continuous(limits = c(-1,
14),
breaks = c(0,
5,
10)) +
theme_classic() +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(margin = margin(r = 0.3,
unit = "cm"),
size = 12),
axis.text = element_text(size = 9),
plot.tag.position = c(0.14,
0.95))
# fig 2b ----
fig_2b <-
df_articles %>%
filter(include_analysis) %>%
ggplot(aes(x = article_type,
y = n_all_refs_wos)) +
geom_boxplot(outliers = F) +
# geom_jitter(height = 0,
# width = 0.2,
# alpha = 0.3) +
geom_point(data = tidy(glmer_n_all_neg_bin) %>%
mutate(corrected_est = exp(estimate),
term = str_remove(term,
"article_type")) %>%
filter(is.na(group)),
aes(x = term,
y = corrected_est),
col = "red",
size = 2) +
annotate("text",
x = c(1:6),
y = -1,
label = c("a",
"bc",
"ac",
"ac",
"b",
"a")) +
labs(x = "Article type",
y = "Total number of references") +
scale_y_continuous(limits = c(-1,
270),
breaks = seq(0,
500,
100)) +
theme_classic() +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(margin = margin(r = 0.3, unit = "cm"),
size = 12),
axis.text = element_text(size = 9),
plot.tag.position = c(0.14, 0.95))
# fig 2c ----
fig_2c <-
df_articles %>%
filter(include_analysis) %>%
ggplot(aes(x = article_type,
y = rel_true_meta_refs)) +
geom_boxplot(outliers = F) +
# geom_jitter(height = 0,
# width = 0.2,
# alpha = 0.3) +
geom_point(data = as.data.frame(emmeans(glmer_rel_true,
~ article_type,
type = "response")),
aes(x = article_type,
y = prob),
col = "red",
size = 2) +
annotate("text",
x = c(1:6),
y = -0.01,
label = c("a",
"b",
rep("abc", 2),
"c",
"abc")) +
labs(x = "Article type",
y = "Proportion of true meta-references\namong all references") +
scale_y_continuous(limits = c(-0.02,
0.09),
breaks = c(0,
0.05)) +
theme_classic() +
theme(axis.title.x = element_text(margin = margin(t = 0.3, unit = "cm"),
size = 12),
axis.title.y = element_text(margin = margin(r = 0.3, unit = "cm"),
size = 12),
axis.text = element_text(size = 9),
plot.tag.position = c(0.14,
0.95))
(fig_2 <-
fig_2a +
fig_2b +
fig_2c +
plot_layout(nrow = 3) +
plot_annotation(tag_levels = "A"))
glmer_jif_prop <-
df_articles %>%
filter(include_analysis) %>%
mutate(jif_2022_s = scale(jif_2022)) %>%
glmer(data = .,
cbind(n_true_meta_refs,
n_all_refs_wos - n_true_meta_refs) ~
jif_2022_s +
(1|source_title) +
(1|article_type),
family = "binomial")
summary(glmer_jif_prop)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: cbind(n_true_meta_refs, n_all_refs_wos - n_true_meta_refs) ~
## jif_2022_s + (1 | source_title) + (1 | article_type)
## Data: .
##
## AIC BIC logLik -2*log(L) df.resid
## 2021.2 2039.0 -1006.6 2013.2 641
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.7957 -0.4998 -0.1458 0.4313 5.7613
##
## Random effects:
## Groups Name Variance Std.Dev.
## source_title (Intercept) 0.06568 0.2563
## article_type (Intercept) 0.06345 0.2519
## Number of obs: 645, groups: source_title, 120; article_type, 6
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.97867 0.12412 -32.054 <2e-16 ***
## jif_2022_s 0.02581 0.04818 0.536 0.592
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## jif_2022_s 0.007
emmeans_df <-
tibble(as.data.frame(emmeans(glmer_jif_prop,
~ jif_2022_s,
at = list(jif_2022_s = seq(min(scale(df_articles %>%
filter(include_analysis) %>%
pull(jif_2022))),
max((scale(df_articles %>%
filter(include_analysis) %>%
pull(jif_2022)))),
length.out = 100)),
type = "response"))) %>%
mutate(jif_2022 = (jif_2022_s *
sd(df_articles %>%
filter(include_analysis) %>%
pull(jif_2022))) +
mean(df_articles %>%
filter(include_analysis) %>%
pull(jif_2022)))
## fig. s2 ----
(fig_s2 <-
df_articles %>%
filter(include_analysis) %>%
# mutate(jif_2022_s = scale(jif_2022)) %>%
ggplot(aes(x = jif_2022,
y = rel_true_meta_refs)) +
geom_jitter(height = 0,
width = 0.2,
alpha = 0.3) +
geom_smooth(data = emmeans_df,
aes(x = jif_2022,
y = prob),
col = "red") +
geom_smooth(data = emmeans_df,
aes(x = jif_2022,
y = asymp.LCL),
linetype = "dashed",
col = "red",
alpha = 0.3) +
geom_smooth(data = emmeans_df,
aes(x = jif_2022,
y = asymp.UCL),
linetype = "dashed",
col = "red",
alpha = 0.3) +
labs(x = "Clarivate's 2022 journal impact factor",
y = "Proportion of true meta references\namong all references",
col = "Article type") +
theme_classic() +
theme(axis.title.x = element_text(margin = margin(t = 0.3, unit = "cm"),
size = 12),
axis.title.y = element_text(margin = margin(r = 0.3, unit = "cm"),
size = 12),
axis.text = element_text(size = 9)))
x <-
df_references %>%
distinct(article_id,
meta_ref_id,
meta_ref_section_adj,
.keep_all = T) %>%
left_join(df_articles %>%
dplyr::select(article_id,
article_type)) %>%
filter(is_meta_ref_meta_analysis,
!is.na(meta_ref_section_adj),
meta_ref_section_adj != "other",
article_type != "invalid") %>%
count(article_type,
meta_ref_section_adj) %>%
pivot_wider(names_from = meta_ref_section_adj,
values_from = n) %>%
mutate(methods = replace_na(methods, 0),
total = introduction + methods + `results/discussion/conclusion`,
prop_intro = introduction / total,
prop_methods = methods / total,
prop_res = `results/discussion/conclusion` / total)
glm_introduction <-
x %>%
glm(data = .,
cbind(introduction,
total - introduction) ~
article_type - 1,
family = "binomial")
summary(glht(glm_introduction,
linfct = mcp(article_type = "Tukey")))
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: glm(formula = cbind(introduction, total - introduction) ~ article_type -
## 1, family = "binomial", data = .)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## Meta-analytical - Empirical == 0 -0.80657 0.16750 -4.816 <0.001 ***
## Methodological - Empirical == 0 -0.20044 0.47630 -0.421 0.9979
## Other - Empirical == 0 -0.45175 0.50853 -0.888 0.9386
## Review - Empirical == 0 -0.03054 0.27005 -0.113 1.0000
## Theoretical - Empirical == 0 -0.11343 0.42294 -0.268 0.9998
## Methodological - Meta-analytical == 0 0.60614 0.49562 1.223 0.7972
## Other - Meta-analytical == 0 0.35482 0.52667 0.674 0.9812
## Review - Meta-analytical == 0 0.77603 0.30283 2.563 0.0896 .
## Theoretical - Meta-analytical == 0 0.69315 0.44459 1.559 0.5832
## Other - Methodological == 0 -0.25131 0.69007 -0.364 0.9989
## Review - Methodological == 0 0.16990 0.53899 0.315 0.9995
## Theoretical - Methodological == 0 0.08701 0.62965 0.138 1.0000
## Review - Other == 0 0.42121 0.56767 0.742 0.9712
## Theoretical - Other == 0 0.33833 0.65438 0.517 0.9944
## Theoretical - Review == 0 -0.08289 0.49247 -0.168 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
cld(glht(glm_introduction,
linfct = mcp(article_type = "Tukey")),
test = adjusted("none"),
letters = Letters)
## Empirical Meta-analytical Methodological Other Review
## "a" "b" "ab" "ab" "ab"
## Theoretical
## "ab"
glm_methods <-
x %>%
glm(data = .,
cbind(methods,
total - methods) ~
article_type - 1,
family = "binomial")
summary(glht(glm_methods,
linfct = mcp(article_type = "Tukey")))
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: glm(formula = cbind(methods, total - methods) ~ article_type -
## 1, family = "binomial", data = .)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## Meta-analytical - Empirical == 0 1.6534 0.2186 7.565 <1e-04
## Methodological - Empirical == 0 1.4639 0.5841 2.506 0.0861
## Other - Empirical == 0 0.7707 0.7689 1.002 0.8855
## Review - Empirical == 0 -0.2101 0.6090 -0.345 0.9990
## Theoretical - Empirical == 0 -23.1669 52786.2621 0.000 1.0000
## Methodological - Meta-analytical == 0 -0.1895 0.5911 -0.321 0.9993
## Other - Meta-analytical == 0 -0.8826 0.7742 -1.140 0.8185
## Review - Meta-analytical == 0 -1.8635 0.6158 -3.026 0.0197
## Theoretical - Meta-analytical == 0 -24.8202 52786.2621 0.000 1.0000
## Other - Methodological == 0 -0.6931 0.9449 -0.734 0.9676
## Review - Methodological == 0 -1.6740 0.8201 -2.041 0.2477
## Theoretical - Methodological == 0 -24.6307 52786.2621 0.000 1.0000
## Review - Other == 0 -0.9808 0.9605 -1.021 0.8774
## Theoretical - Other == 0 -23.9376 52786.2621 0.000 1.0000
## Theoretical - Review == 0 -22.9568 52786.2621 0.000 1.0000
##
## Meta-analytical - Empirical == 0 ***
## Methodological - Empirical == 0 .
## Other - Empirical == 0
## Review - Empirical == 0
## Theoretical - Empirical == 0
## Methodological - Meta-analytical == 0
## Other - Meta-analytical == 0
## Review - Meta-analytical == 0 *
## Theoretical - Meta-analytical == 0
## Other - Methodological == 0
## Review - Methodological == 0
## Theoretical - Methodological == 0
## Review - Other == 0
## Theoretical - Other == 0
## Theoretical - Review == 0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
cld(glht(glm_methods,
linfct = mcp(article_type = "Tukey")),
test = adjusted("none"),
letters = Letters)
## Empirical Meta-analytical Methodological Other Review
## "a" "b" "ab" "ab" "a"
## Theoretical
## "ab"
glm_discussion <-
x %>%
glm(data = .,
cbind(`results/discussion/conclusion`,
total - `results/discussion/conclusion`) ~
article_type - 1,
family = "binomial")
cld(glht(glm_discussion,
linfct = mcp(article_type = "Tukey")),
test = adjusted("none"),
letters = Letters)
## Empirical Meta-analytical Methodological Other Review
## "a" "a" "a" "a" "a"
## Theoretical
## "a"
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: glm(formula = cbind(`results/discussion/conclusion`, total -
## `results/discussion/conclusion`) ~ article_type - 1, family = "binomial",
## data = .)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## Meta-analytical - Empirical == 0 0.009737 0.165243 0.059 1.000
## Methodological - Empirical == 0 -0.500036 0.530809 -0.942 0.922
## Other - Empirical == 0 0.204161 0.508727 0.401 0.998
## Review - Empirical == 0 0.078181 0.273994 0.285 1.000
## Theoretical - Empirical == 0 0.368464 0.423175 0.871 0.943
## Methodological - Meta-analytical == 0 -0.509772 0.547169 -0.932 0.925
## Other - Meta-analytical == 0 0.194425 0.525774 0.370 0.999
## Review - Meta-analytical == 0 0.068445 0.304481 0.225 1.000
## Theoretical - Meta-analytical == 0 0.358728 0.443523 0.809 0.958
## Other - Methodological == 0 0.704197 0.728623 0.966 0.914
## Review - Methodological == 0 0.578217 0.589204 0.981 0.908
## Theoretical - Methodological == 0 0.868500 0.671689 1.293 0.756
## Review - Other == 0 -0.125980 0.569391 -0.221 1.000
## Theoretical - Other == 0 0.164303 0.654378 0.251 1.000
## Theoretical - Review == 0 0.290283 0.494450 0.587 0.990
## (Adjusted p values reported -- single-step method)
### fig 3a ----
fig_3a <-
df_references %>%
distinct(article_id,
meta_ref_id,
meta_ref_section_adj,
.keep_all = T) %>%
left_join(df_articles %>%
dplyr::select(article_id,
article_type)) %>%
filter(is_meta_ref_meta_analysis,
!is.na(meta_ref_section_adj),
meta_ref_section_adj != "other",
article_type != "invalid") %>%
count(article_type,
meta_ref_section_adj) %>%
group_by(article_type) %>%
mutate(prop = n / sum(n)) %>%
ggplot(aes(x = article_type,
y = prop,
group = meta_ref_section_adj,
fill = meta_ref_section_adj,
label = n)) +
geom_col() +
geom_label(hjust = 0.5,
position = position_stack(vjust = 0.5),
col = "black",
fill = "white",
label.r = unit(0.4, "lines"),
label.padding = unit(0.15,
"lines"),
size = 3.5) +
scale_fill_manual(values = c("grey90",
"grey60",
"grey30")) +
scale_y_continuous(breaks = seq(0,
1,
by = 0.2),
limits = c(0, 1)) +
labs(pattern = "Section",
pattern_angle = "Section",
pattern_density = "Section",
pattern_spacing = "Section",
fill = "Section",
x = "Article type",
y = "Proportion of articles") +
theme_classic() +
theme(axis.title.x = element_text(margin = margin(t = 0.3,
unit = "cm"),
size = 12),
axis.title.y = element_text(margin = margin(r = 0.3,
unit = "cm"),
size = 12),
axis.text = element_text(size = 9),
legend.title = element_text(size = 12),
legend.text = element_text(size = 9),
legend.position = "top",
plot.tag.position = c(0.13,
0.93))
### fig 3b ----
res_quotes <-
df_quotes %>%
count(reports_any_result,
reports_quantitative_result,
reports_limitation) %>%
mutate(prop = n/nrow(df_quotes))
fig_3b <-
res_quotes %>%
filter(reports_any_result) %>%
summarise(n = sum(n),
value = sum(prop)) %>%
bind_rows(res_quotes %>%
filter(reports_quantitative_result) %>%
summarise(n = sum(n),
value = sum(prop))) %>%
bind_rows(res_quotes %>%
filter(reports_limitation) %>%
summarise(n = sum(n),
value = sum(prop))) %>%
mutate(x = factor(c("Any result",
"Quantitative result",
"Limitation"),
levels = c("Any result",
"Quantitative result",
"Limitation"))) %>%
ggplot(aes(x = x,
y = value,
label = n)) +
geom_col(fill = "black") +
geom_label(hjust = 0.5,
position = position_stack(vjust = 0.5),
col = "black",
fill = "white",
label.r = unit(0.4,
"lines"),
label.padding = unit(0.15,
"lines"),
size = 3.5) +
scale_y_continuous(breaks = seq(0,
1,
by = 0.2),
limits = c(0, 1)) +
labs(x = "Content type",
y = "Proportion of quotations") +
theme_classic() +
theme(axis.title.x = element_text(margin = margin(t = 0.3, unit = "cm"),
size = 12),
axis.title.y = element_text(margin = margin(r = 0.3, unit = "cm"),
size = 12),
axis.text = element_text(size = 9),
plot.tag.position = c(0.13, 0.95))
### joined figures ----
(fig_3 <-
fig_3a +
fig_3b +
plot_layout(nrow = 2) +
plot_annotation(tag_levels = "A"))
df_quotes %>%
count(reports_any_result,
reports_quantitative_result) %>%
mutate(prop = n/nrow(df_quotes))
## # A tibble: 3 × 4
## reports_any_result reports_quantitative_result n prop
## <lgl> <lgl> <int> <dbl>
## 1 FALSE FALSE 377 0.229
## 2 TRUE FALSE 1196 0.727
## 3 TRUE TRUE 73 0.0443
df_articles %>%
filter(manually_verified) %>%
mutate(auto_meta = str_detect(title,
"meta-an|metaan|meta-reg|metareg")) %>%
count(auto_meta,
article_type)
## # A tibble: 9 × 3
## auto_meta article_type n
## <lgl> <fct> <int>
## 1 FALSE Empirical 499
## 2 FALSE Invalid 7
## 3 FALSE Meta-analytical 25
## 4 FALSE Methodological 16
## 5 FALSE Other 17
## 6 FALSE Review 82
## 7 FALSE Theoretical 17
## 8 TRUE Meta-analytical 22
## 9 TRUE Methodological 1